Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL %s
      3 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
      4 
      5 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
      6 ; KNL-LABEL: test1:
      7 ; KNL:       ## BB#0:
      8 ; KNL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
      9 ; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
     10 ; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
     11 ; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
     12 ; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
     13 ; KNL-NEXT:    retq
     14 ;
     15 ; SKX-LABEL: test1:
     16 ; SKX:       ## BB#0:
     17 ; SKX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
     18 ; SKX-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
     19 ; SKX-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
     20 ; SKX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
     21 ; SKX-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
     22 ; SKX-NEXT:    retq
     23   %rrr = load float, float* %br
     24   %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
     25   %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
     26   ret <16 x float> %rrr3
     27 }
     28 
     29 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
     30 ; KNL-LABEL: test2:
     31 ; KNL:       ## BB#0:
     32 ; KNL-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
     33 ; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
     34 ; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
     35 ; KNL-NEXT:    vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
     36 ; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
     37 ; KNL-NEXT:    retq
     38 ;
     39 ; SKX-LABEL: test2:
     40 ; SKX:       ## BB#0:
     41 ; SKX-NEXT:    vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
     42 ; SKX-NEXT:    vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
     43 ; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm2
     44 ; SKX-NEXT:    vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
     45 ; SKX-NEXT:    vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
     46 ; SKX-NEXT:    retq
     47   %rrr = load double, double* %br
     48   %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
     49   %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
     50   ret <8 x double> %rrr3
     51 }
     52 
     53 define <16 x float> @test3(<16 x float> %x) nounwind {
     54 ; KNL-LABEL: test3:
     55 ; KNL:       ## BB#0:
     56 ; KNL-NEXT:    vextractf32x4 $1, %zmm0, %xmm1
     57 ; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
     58 ; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
     59 ; KNL-NEXT:    retq
     60 ;
     61 ; SKX-LABEL: test3:
     62 ; SKX:       ## BB#0:
     63 ; SKX-NEXT:    vextractf32x4 $1, %zmm0, %xmm1
     64 ; SKX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
     65 ; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
     66 ; SKX-NEXT:    retq
     67   %eee = extractelement <16 x float> %x, i32 4
     68   %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
     69   ret <16 x float> %rrr2
     70 }
     71 
     72 define <8 x i64> @test4(<8 x i64> %x) nounwind {
     73 ; KNL-LABEL: test4:
     74 ; KNL:       ## BB#0:
     75 ; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
     76 ; KNL-NEXT:    vmovq %xmm1, %rax
     77 ; KNL-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
     78 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
     79 ; KNL-NEXT:    retq
     80 ;
     81 ; SKX-LABEL: test4:
     82 ; SKX:       ## BB#0:
     83 ; SKX-NEXT:    vextracti64x2 $2, %zmm0, %xmm1
     84 ; SKX-NEXT:    vmovq %xmm1, %rax
     85 ; SKX-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
     86 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
     87 ; SKX-NEXT:    retq
     88   %eee = extractelement <8 x i64> %x, i32 4
     89   %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
     90   ret <8 x i64> %rrr2
     91 }
     92 
     93 define i32 @test5(<4 x float> %x) nounwind {
     94 ; KNL-LABEL: test5:
     95 ; KNL:       ## BB#0:
     96 ; KNL-NEXT:    vextractps $3, %xmm0, %eax
     97 ; KNL-NEXT:    retq
     98 ;
     99 ; SKX-LABEL: test5:
    100 ; SKX:       ## BB#0:
    101 ; SKX-NEXT:    vextractps $3, %xmm0, %eax
    102 ; SKX-NEXT:    retq
    103   %ef = extractelement <4 x float> %x, i32 3
    104   %ei = bitcast float %ef to i32
    105   ret i32 %ei
    106 }
    107 
    108 define void @test6(<4 x float> %x, float* %out) nounwind {
    109 ; KNL-LABEL: test6:
    110 ; KNL:       ## BB#0:
    111 ; KNL-NEXT:    vextractps $3, %xmm0, (%rdi)
    112 ; KNL-NEXT:    retq
    113 ;
    114 ; SKX-LABEL: test6:
    115 ; SKX:       ## BB#0:
    116 ; SKX-NEXT:    vextractps $3, %xmm0, (%rdi)
    117 ; SKX-NEXT:    retq
    118   %ef = extractelement <4 x float> %x, i32 3
    119   store float %ef, float* %out, align 4
    120   ret void
    121 }
    122 
    123 define float @test7(<16 x float> %x, i32 %ind) nounwind {
    124 ; KNL-LABEL: test7:
    125 ; KNL:       ## BB#0:
    126 ; KNL-NEXT:    vmovd %edi, %xmm1
    127 ; KNL-NEXT:    vpermps %zmm0, %zmm1, %zmm0
    128 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
    129 ; KNL-NEXT:    retq
    130 ;
    131 ; SKX-LABEL: test7:
    132 ; SKX:       ## BB#0:
    133 ; SKX-NEXT:    vmovd %edi, %xmm1
    134 ; SKX-NEXT:    vpermps %zmm0, %zmm1, %zmm0
    135 ; SKX-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
    136 ; SKX-NEXT:    retq
    137   %e = extractelement <16 x float> %x, i32 %ind
    138   ret float %e
    139 }
    140 
    141 define double @test8(<8 x double> %x, i32 %ind) nounwind {
    142 ; KNL-LABEL: test8:
    143 ; KNL:       ## BB#0:
    144 ; KNL-NEXT:    movslq %edi, %rax
    145 ; KNL-NEXT:    vmovq %rax, %xmm1
    146 ; KNL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    147 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
    148 ; KNL-NEXT:    retq
    149 ;
    150 ; SKX-LABEL: test8:
    151 ; SKX:       ## BB#0:
    152 ; SKX-NEXT:    movslq %edi, %rax
    153 ; SKX-NEXT:    vmovq %rax, %xmm1
    154 ; SKX-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    155 ; SKX-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
    156 ; SKX-NEXT:    retq
    157   %e = extractelement <8 x double> %x, i32 %ind
    158   ret double %e
    159 }
    160 
    161 define float @test9(<8 x float> %x, i32 %ind) nounwind {
    162 ; KNL-LABEL: test9:
    163 ; KNL:       ## BB#0:
    164 ; KNL-NEXT:    vmovd %edi, %xmm1
    165 ; KNL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    166 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
    167 ; KNL-NEXT:    retq
    168 ;
    169 ; SKX-LABEL: test9:
    170 ; SKX:       ## BB#0:
    171 ; SKX-NEXT:    vmovd %edi, %xmm1
    172 ; SKX-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    173 ; SKX-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
    174 ; SKX-NEXT:    retq
    175   %e = extractelement <8 x float> %x, i32 %ind
    176   ret float %e
    177 }
    178 
    179 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
    180 ; KNL-LABEL: test10:
    181 ; KNL:       ## BB#0:
    182 ; KNL-NEXT:    vmovd %edi, %xmm1
    183 ; KNL-NEXT:    vpermd %zmm0, %zmm1, %zmm0
    184 ; KNL-NEXT:    vmovd %xmm0, %eax
    185 ; KNL-NEXT:    retq
    186 ;
    187 ; SKX-LABEL: test10:
    188 ; SKX:       ## BB#0:
    189 ; SKX-NEXT:    vmovd %edi, %xmm1
    190 ; SKX-NEXT:    vpermd %zmm0, %zmm1, %zmm0
    191 ; SKX-NEXT:    vmovd %xmm0, %eax
    192 ; SKX-NEXT:    retq
    193   %e = extractelement <16 x i32> %x, i32 %ind
    194   ret i32 %e
    195 }
    196 
    197 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
    198 ; KNL-LABEL: test11:
    199 ; KNL:       ## BB#0:
    200 ; KNL-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
    201 ; KNL-NEXT:    kshiftlw $11, %k0, %k0
    202 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    203 ; KNL-NEXT:    kmovw %k0, %eax
    204 ; KNL-NEXT:    testb %al, %al
    205 ; KNL-NEXT:    je LBB10_2
    206 ; KNL-NEXT:  ## BB#1: ## %A
    207 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    208 ; KNL-NEXT:    retq
    209 ; KNL-NEXT:  LBB10_2: ## %B
    210 ; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    211 ; KNL-NEXT:    retq
    212 ;
    213 ; SKX-LABEL: test11:
    214 ; SKX:       ## BB#0:
    215 ; SKX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
    216 ; SKX-NEXT:    kshiftlw $11, %k0, %k0
    217 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
    218 ; SKX-NEXT:    kmovw %k0, %eax
    219 ; SKX-NEXT:    testb %al, %al
    220 ; SKX-NEXT:    je LBB10_2
    221 ; SKX-NEXT:  ## BB#1: ## %A
    222 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
    223 ; SKX-NEXT:    retq
    224 ; SKX-NEXT:  LBB10_2: ## %B
    225 ; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    226 ; SKX-NEXT:    retq
    227   %cmp_res = icmp ult <16 x i32> %a, %b
    228   %ia = extractelement <16 x i1> %cmp_res, i32 4
    229   br i1 %ia, label %A, label %B
    230   A:
    231     ret <16 x i32>%b
    232   B:
    233    %c = add <16 x i32>%b, %a
    234    ret <16 x i32>%c
    235 }
    236 
    237 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
    238 ; KNL-LABEL: test12:
    239 ; KNL:       ## BB#0:
    240 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
    241 ; KNL-NEXT:    vpcmpgtq %zmm1, %zmm3, %k1
    242 ; KNL-NEXT:    kunpckbw %k0, %k1, %k0
    243 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
    244 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    245 ; KNL-NEXT:    kmovw %k0, %eax
    246 ; KNL-NEXT:    testb %al, %al
    247 ; KNL-NEXT:    cmoveq %rsi, %rdi
    248 ; KNL-NEXT:    movq %rdi, %rax
    249 ; KNL-NEXT:    retq
    250 ;
    251 ; SKX-LABEL: test12:
    252 ; SKX:       ## BB#0:
    253 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
    254 ; SKX-NEXT:    vpcmpgtq %zmm1, %zmm3, %k1
    255 ; SKX-NEXT:    kunpckbw %k0, %k1, %k0
    256 ; SKX-NEXT:    kshiftlw $15, %k0, %k0
    257 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
    258 ; SKX-NEXT:    kmovw %k0, %eax
    259 ; SKX-NEXT:    testb %al, %al
    260 ; SKX-NEXT:    cmoveq %rsi, %rdi
    261 ; SKX-NEXT:    movq %rdi, %rax
    262 ; SKX-NEXT:    retq
    263   %cmpvector_func.i = icmp slt <16 x i64> %a, %b
    264   %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
    265   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
    266   ret i64 %res
    267 }
    268 
    269 define i16 @test13(i32 %a, i32 %b) {
    270 ; KNL-LABEL: test13:
    271 ; KNL:       ## BB#0:
    272 ; KNL-NEXT:    cmpl %esi, %edi
    273 ; KNL-NEXT:    setb %al
    274 ; KNL-NEXT:    kmovw %eax, %k0
    275 ; KNL-NEXT:    movw $-4, %ax
    276 ; KNL-NEXT:    kmovw %eax, %k1
    277 ; KNL-NEXT:    korw %k0, %k1, %k0
    278 ; KNL-NEXT:    kmovw %k0, %eax
    279 ; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    280 ; KNL-NEXT:    retq
    281 ;
    282 ; SKX-LABEL: test13:
    283 ; SKX:       ## BB#0:
    284 ; SKX-NEXT:    cmpl %esi, %edi
    285 ; SKX-NEXT:    setb %al
    286 ; SKX-NEXT:    kmovw %eax, %k0
    287 ; SKX-NEXT:    movw $-4, %ax
    288 ; SKX-NEXT:    kmovw %eax, %k1
    289 ; SKX-NEXT:    korw %k0, %k1, %k0
    290 ; SKX-NEXT:    kmovw %k0, %eax
    291 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    292 ; SKX-NEXT:    retq
    293   %cmp_res = icmp ult i32 %a, %b
    294   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
    295   %res = bitcast <16 x i1> %maskv to i16
    296   ret i16 %res
    297 }
    298 
    299 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
    300 ; KNL-LABEL: test14:
    301 ; KNL:       ## BB#0:
    302 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
    303 ; KNL-NEXT:    kshiftlw $11, %k0, %k0
    304 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    305 ; KNL-NEXT:    kmovw %k0, %eax
    306 ; KNL-NEXT:    testb %al, %al
    307 ; KNL-NEXT:    cmoveq %rsi, %rdi
    308 ; KNL-NEXT:    movq %rdi, %rax
    309 ; KNL-NEXT:    retq
    310 ;
    311 ; SKX-LABEL: test14:
    312 ; SKX:       ## BB#0:
    313 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
    314 ; SKX-NEXT:    kshiftlb $3, %k0, %k0
    315 ; SKX-NEXT:    kshiftrb $7, %k0, %k0
    316 ; SKX-NEXT:    kmovw %k0, %eax
    317 ; SKX-NEXT:    testb %al, %al
    318 ; SKX-NEXT:    cmoveq %rsi, %rdi
    319 ; SKX-NEXT:    movq %rdi, %rax
    320 ; SKX-NEXT:    retq
    321   %cmpvector_func.i = icmp slt <8 x i64> %a, %b
    322   %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
    323   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
    324   ret i64 %res
    325 }
    326 
    327 define i16 @test15(i1 *%addr) {
    328 ; KNL-LABEL: test15:
    329 ; KNL:       ## BB#0:
    330 ; KNL-NEXT:    movb (%rdi), %al
    331 ; KNL-NEXT:    xorl %ecx, %ecx
    332 ; KNL-NEXT:    testb %al, %al
    333 ; KNL-NEXT:    movw $-1, %ax
    334 ; KNL-NEXT:    cmovew %cx, %ax
    335 ; KNL-NEXT:    retq
    336 ;
    337 ; SKX-LABEL: test15:
    338 ; SKX:       ## BB#0:
    339 ; SKX-NEXT:    movb (%rdi), %al
    340 ; SKX-NEXT:    xorl %ecx, %ecx
    341 ; SKX-NEXT:    testb %al, %al
    342 ; SKX-NEXT:    movw $-1, %ax
    343 ; SKX-NEXT:    cmovew %cx, %ax
    344 ; SKX-NEXT:    retq
    345   %x = load i1 , i1 * %addr, align 1
    346   %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
    347   %x2 = bitcast <16 x i1>%x1 to i16
    348   ret i16 %x2
    349 }
    350 
    351 define i16 @test16(i1 *%addr, i16 %a) {
    352 ; KNL-LABEL: test16:
    353 ; KNL:       ## BB#0:
    354 ; KNL-NEXT:    movzbl (%rdi), %eax
    355 ; KNL-NEXT:    andl $1, %eax
    356 ; KNL-NEXT:    kmovw %eax, %k0
    357 ; KNL-NEXT:    kmovw %esi, %k1
    358 ; KNL-NEXT:    kshiftlw $10, %k0, %k0
    359 ; KNL-NEXT:    korw %k0, %k1, %k0
    360 ; KNL-NEXT:    kmovw %k0, %eax
    361 ; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    362 ; KNL-NEXT:    retq
    363 ;
    364 ; SKX-LABEL: test16:
    365 ; SKX:       ## BB#0:
    366 ; SKX-NEXT:    movzbl (%rdi), %eax
    367 ; SKX-NEXT:    andl $1, %eax
    368 ; SKX-NEXT:    kmovd %eax, %k0
    369 ; SKX-NEXT:    kmovw %esi, %k1
    370 ; SKX-NEXT:    kshiftlw $10, %k0, %k0
    371 ; SKX-NEXT:    korw %k0, %k1, %k0
    372 ; SKX-NEXT:    kmovw %k0, %eax
    373 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    374 ; SKX-NEXT:    retq
    375   %x = load i1 , i1 * %addr, align 128
    376   %a1 = bitcast i16 %a to <16 x i1>
    377   %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
    378   %x2 = bitcast <16 x i1>%x1 to i16
    379   ret i16 %x2
    380 }
    381 
    382 define i8 @test17(i1 *%addr, i8 %a) {
    383 ; KNL-LABEL: test17:
    384 ; KNL:       ## BB#0:
    385 ; KNL-NEXT:    movzbl (%rdi), %eax
    386 ; KNL-NEXT:    andl $1, %eax
    387 ; KNL-NEXT:    kmovw %eax, %k0
    388 ; KNL-NEXT:    kmovw %esi, %k1
    389 ; KNL-NEXT:    kshiftlw $4, %k0, %k0
    390 ; KNL-NEXT:    korw %k0, %k1, %k0
    391 ; KNL-NEXT:    kmovw %k0, %eax
    392 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    393 ; KNL-NEXT:    retq
    394 ;
    395 ; SKX-LABEL: test17:
    396 ; SKX:       ## BB#0:
    397 ; SKX-NEXT:    movzbl (%rdi), %eax
    398 ; SKX-NEXT:    andl $1, %eax
    399 ; SKX-NEXT:    kmovd %eax, %k0
    400 ; SKX-NEXT:    kmovb %esi, %k1
    401 ; SKX-NEXT:    kshiftlb $4, %k0, %k0
    402 ; SKX-NEXT:    korb %k0, %k1, %k0
    403 ; SKX-NEXT:    kmovb %k0, %eax
    404 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    405 ; SKX-NEXT:    retq
    406   %x = load i1 , i1 * %addr, align 128
    407   %a1 = bitcast i8 %a to <8 x i1>
    408   %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
    409   %x2 = bitcast <8 x i1>%x1 to i8
    410   ret i8 %x2
    411 }
    412 
    413 define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
    414 ; KNL-LABEL: extract_v8i64:
    415 ; KNL:       ## BB#0:
    416 ; KNL-NEXT:    vpextrq $1, %xmm0, %rax
    417 ; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    418 ; KNL-NEXT:    vpextrq $1, %xmm0, (%rdi)
    419 ; KNL-NEXT:    retq
    420 ;
    421 ; SKX-LABEL: extract_v8i64:
    422 ; SKX:       ## BB#0:
    423 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax
    424 ; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm0
    425 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
    426 ; SKX-NEXT:    retq
    427   %r1 = extractelement <8 x i64> %x, i32 1
    428   %r2 = extractelement <8 x i64> %x, i32 3
    429   store i64 %r2, i64* %dst, align 1
    430   ret i64 %r1
    431 }
    432 
    433 define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
    434 ; KNL-LABEL: extract_v4i64:
    435 ; KNL:       ## BB#0:
    436 ; KNL-NEXT:    vpextrq $1, %xmm0, %rax
    437 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    438 ; KNL-NEXT:    vpextrq $1, %xmm0, (%rdi)
    439 ; KNL-NEXT:    retq
    440 ;
    441 ; SKX-LABEL: extract_v4i64:
    442 ; SKX:       ## BB#0:
    443 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax
    444 ; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm0
    445 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
    446 ; SKX-NEXT:    retq
    447   %r1 = extractelement <4 x i64> %x, i32 1
    448   %r2 = extractelement <4 x i64> %x, i32 3
    449   store i64 %r2, i64* %dst, align 1
    450   ret i64 %r1
    451 }
    452 
    453 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
    454 ; KNL-LABEL: extract_v2i64:
    455 ; KNL:       ## BB#0:
    456 ; KNL-NEXT:    vmovq %xmm0, %rax
    457 ; KNL-NEXT:    vpextrq $1, %xmm0, (%rdi)
    458 ; KNL-NEXT:    retq
    459 ;
    460 ; SKX-LABEL: extract_v2i64:
    461 ; SKX:       ## BB#0:
    462 ; SKX-NEXT:    vmovq %xmm0, %rax
    463 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
    464 ; SKX-NEXT:    retq
    465   %r1 = extractelement <2 x i64> %x, i32 0
    466   %r2 = extractelement <2 x i64> %x, i32 1
    467   store i64 %r2, i64* %dst, align 1
    468   ret i64 %r1
    469 }
    470 
    471 define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
    472 ; KNL-LABEL: extract_v16i32:
    473 ; KNL:       ## BB#0:
    474 ; KNL-NEXT:    vpextrd $1, %xmm0, %eax
    475 ; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    476 ; KNL-NEXT:    vpextrd $1, %xmm0, (%rdi)
    477 ; KNL-NEXT:    retq
    478 ;
    479 ; SKX-LABEL: extract_v16i32:
    480 ; SKX:       ## BB#0:
    481 ; SKX-NEXT:    vpextrd $1, %xmm0, %eax
    482 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    483 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
    484 ; SKX-NEXT:    retq
    485   %r1 = extractelement <16 x i32> %x, i32 1
    486   %r2 = extractelement <16 x i32> %x, i32 5
    487   store i32 %r2, i32* %dst, align 1
    488   ret i32 %r1
    489 }
    490 
    491 define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
    492 ; KNL-LABEL: extract_v8i32:
    493 ; KNL:       ## BB#0:
    494 ; KNL-NEXT:    vpextrd $1, %xmm0, %eax
    495 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    496 ; KNL-NEXT:    vpextrd $1, %xmm0, (%rdi)
    497 ; KNL-NEXT:    retq
    498 ;
    499 ; SKX-LABEL: extract_v8i32:
    500 ; SKX:       ## BB#0:
    501 ; SKX-NEXT:    vpextrd $1, %xmm0, %eax
    502 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
    503 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
    504 ; SKX-NEXT:    retq
    505   %r1 = extractelement <8 x i32> %x, i32 1
    506   %r2 = extractelement <8 x i32> %x, i32 5
    507   store i32 %r2, i32* %dst, align 1
    508   ret i32 %r1
    509 }
    510 
    511 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
    512 ; KNL-LABEL: extract_v4i32:
    513 ; KNL:       ## BB#0:
    514 ; KNL-NEXT:    vpextrd $1, %xmm0, %eax
    515 ; KNL-NEXT:    vpextrd $3, %xmm0, (%rdi)
    516 ; KNL-NEXT:    retq
    517 ;
    518 ; SKX-LABEL: extract_v4i32:
    519 ; SKX:       ## BB#0:
    520 ; SKX-NEXT:    vpextrd $1, %xmm0, %eax
    521 ; SKX-NEXT:    vpextrd $3, %xmm0, (%rdi)
    522 ; SKX-NEXT:    retq
    523   %r1 = extractelement <4 x i32> %x, i32 1
    524   %r2 = extractelement <4 x i32> %x, i32 3
    525   store i32 %r2, i32* %dst, align 1
    526   ret i32 %r1
    527 }
    528 
    529 define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
    530 ; KNL-LABEL: extract_v32i16:
    531 ; KNL:       ## BB#0:
    532 ; KNL-NEXT:    vpextrw $1, %xmm0, %eax
    533 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    534 ; KNL-NEXT:    vpextrw $1, %xmm0, (%rdi)
    535 ; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    536 ; KNL-NEXT:    retq
    537 ;
    538 ; SKX-LABEL: extract_v32i16:
    539 ; SKX:       ## BB#0:
    540 ; SKX-NEXT:    vpextrw $1, %xmm0, %eax
    541 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    542 ; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
    543 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    544 ; SKX-NEXT:    retq
    545   %r1 = extractelement <32 x i16> %x, i32 1
    546   %r2 = extractelement <32 x i16> %x, i32 9
    547   store i16 %r2, i16* %dst, align 1
    548   ret i16 %r1
    549 }
    550 
    551 define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
    552 ; KNL-LABEL: extract_v16i16:
    553 ; KNL:       ## BB#0:
    554 ; KNL-NEXT:    vpextrw $1, %xmm0, %eax
    555 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    556 ; KNL-NEXT:    vpextrw $1, %xmm0, (%rdi)
    557 ; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    558 ; KNL-NEXT:    retq
    559 ;
    560 ; SKX-LABEL: extract_v16i16:
    561 ; SKX:       ## BB#0:
    562 ; SKX-NEXT:    vpextrw $1, %xmm0, %eax
    563 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
    564 ; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
    565 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    566 ; SKX-NEXT:    retq
    567   %r1 = extractelement <16 x i16> %x, i32 1
    568   %r2 = extractelement <16 x i16> %x, i32 9
    569   store i16 %r2, i16* %dst, align 1
    570   ret i16 %r1
    571 }
    572 
    573 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
    574 ; KNL-LABEL: extract_v8i16:
    575 ; KNL:       ## BB#0:
    576 ; KNL-NEXT:    vpextrw $1, %xmm0, %eax
    577 ; KNL-NEXT:    vpextrw $3, %xmm0, (%rdi)
    578 ; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    579 ; KNL-NEXT:    retq
    580 ;
    581 ; SKX-LABEL: extract_v8i16:
    582 ; SKX:       ## BB#0:
    583 ; SKX-NEXT:    vpextrw $1, %xmm0, %eax
    584 ; SKX-NEXT:    vpextrw $3, %xmm0, (%rdi)
    585 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    586 ; SKX-NEXT:    retq
    587   %r1 = extractelement <8 x i16> %x, i32 1
    588   %r2 = extractelement <8 x i16> %x, i32 3
    589   store i16 %r2, i16* %dst, align 1
    590   ret i16 %r1
    591 }
    592 
    593 define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
    594 ; KNL-LABEL: extract_v64i8:
    595 ; KNL:       ## BB#0:
    596 ; KNL-NEXT:    vpextrb $1, %xmm0, %eax
    597 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    598 ; KNL-NEXT:    vpextrb $1, %xmm0, (%rdi)
    599 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    600 ; KNL-NEXT:    retq
    601 ;
    602 ; SKX-LABEL: extract_v64i8:
    603 ; SKX:       ## BB#0:
    604 ; SKX-NEXT:    vpextrb $1, %xmm0, %eax
    605 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    606 ; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
    607 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    608 ; SKX-NEXT:    retq
    609   %r1 = extractelement <64 x i8> %x, i32 1
    610   %r2 = extractelement <64 x i8> %x, i32 17
    611   store i8 %r2, i8* %dst, align 1
    612   ret i8 %r1
    613 }
    614 
    615 define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
    616 ; KNL-LABEL: extract_v32i8:
    617 ; KNL:       ## BB#0:
    618 ; KNL-NEXT:    vpextrb $1, %xmm0, %eax
    619 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    620 ; KNL-NEXT:    vpextrb $1, %xmm0, (%rdi)
    621 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    622 ; KNL-NEXT:    retq
    623 ;
    624 ; SKX-LABEL: extract_v32i8:
    625 ; SKX:       ## BB#0:
    626 ; SKX-NEXT:    vpextrb $1, %xmm0, %eax
    627 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
    628 ; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
    629 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    630 ; SKX-NEXT:    retq
    631   %r1 = extractelement <32 x i8> %x, i32 1
    632   %r2 = extractelement <32 x i8> %x, i32 17
    633   store i8 %r2, i8* %dst, align 1
    634   ret i8 %r1
    635 }
    636 
    637 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
    638 ; KNL-LABEL: extract_v16i8:
    639 ; KNL:       ## BB#0:
    640 ; KNL-NEXT:    vpextrb $1, %xmm0, %eax
    641 ; KNL-NEXT:    vpextrb $3, %xmm0, (%rdi)
    642 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    643 ; KNL-NEXT:    retq
    644 ;
    645 ; SKX-LABEL: extract_v16i8:
    646 ; SKX:       ## BB#0:
    647 ; SKX-NEXT:    vpextrb $1, %xmm0, %eax
    648 ; SKX-NEXT:    vpextrb $3, %xmm0, (%rdi)
    649 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    650 ; SKX-NEXT:    retq
    651   %r1 = extractelement <16 x i8> %x, i32 1
    652   %r2 = extractelement <16 x i8> %x, i32 3
    653   store i8 %r2, i8* %dst, align 1
    654   ret i8 %r1
    655 }
    656 
    657 define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
    658 ; KNL-LABEL: insert_v8i64:
    659 ; KNL:       ## BB#0:
    660 ; KNL-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
    661 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    662 ; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
    663 ; KNL-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
    664 ; KNL-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
    665 ; KNL-NEXT:    retq
    666 ;
    667 ; SKX-LABEL: insert_v8i64:
    668 ; SKX:       ## BB#0:
    669 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
    670 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
    671 ; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm1
    672 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
    673 ; SKX-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
    674 ; SKX-NEXT:    retq
    675   %val = load i64, i64* %ptr
    676   %r1 = insertelement <8 x i64> %x, i64 %val, i32 1
    677   %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
    678   ret <8 x i64> %r2
    679 }
    680 
    681 define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
    682 ; KNL-LABEL: insert_v4i64:
    683 ; KNL:       ## BB#0:
    684 ; KNL-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
    685 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    686 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    687 ; KNL-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
    688 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    689 ; KNL-NEXT:    retq
    690 ;
    691 ; SKX-LABEL: insert_v4i64:
    692 ; SKX:       ## BB#0:
    693 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
    694 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    695 ; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm1
    696 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
    697 ; SKX-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0
    698 ; SKX-NEXT:    retq
    699   %val = load i64, i64* %ptr
    700   %r1 = insertelement <4 x i64> %x, i64 %val, i32 1
    701   %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
    702   ret <4 x i64> %r2
    703 }
    704 
    705 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
    706 ; KNL-LABEL: insert_v2i64:
    707 ; KNL:       ## BB#0:
    708 ; KNL-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
    709 ; KNL-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
    710 ; KNL-NEXT:    retq
    711 ;
    712 ; SKX-LABEL: insert_v2i64:
    713 ; SKX:       ## BB#0:
    714 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
    715 ; SKX-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
    716 ; SKX-NEXT:    retq
    717   %val = load i64, i64* %ptr
    718   %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
    719   %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
    720   ret <2 x i64> %r2
    721 }
    722 
    723 define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
    724 ; KNL-LABEL: insert_v16i32:
    725 ; KNL:       ## BB#0:
    726 ; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    727 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    728 ; KNL-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
    729 ; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    730 ; KNL-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
    731 ; KNL-NEXT:    retq
    732 ;
    733 ; SKX-LABEL: insert_v16i32:
    734 ; SKX:       ## BB#0:
    735 ; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    736 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    737 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
    738 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    739 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
    740 ; SKX-NEXT:    retq
    741   %val = load i32, i32* %ptr
    742   %r1 = insertelement <16 x i32> %x, i32 %val, i32 1
    743   %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
    744   ret <16 x i32> %r2
    745 }
    746 
    747 define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
    748 ; KNL-LABEL: insert_v8i32:
    749 ; KNL:       ## BB#0:
    750 ; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    751 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    752 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    753 ; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    754 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    755 ; KNL-NEXT:    retq
    756 ;
    757 ; SKX-LABEL: insert_v8i32:
    758 ; SKX:       ## BB#0:
    759 ; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    760 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    761 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
    762 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    763 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    764 ; SKX-NEXT:    retq
    765   %val = load i32, i32* %ptr
    766   %r1 = insertelement <8 x i32> %x, i32 %val, i32 1
    767   %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
    768   ret <8 x i32> %r2
    769 }
    770 
    771 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
    772 ; KNL-LABEL: insert_v4i32:
    773 ; KNL:       ## BB#0:
    774 ; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
    775 ; KNL-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
    776 ; KNL-NEXT:    retq
    777 ;
    778 ; SKX-LABEL: insert_v4i32:
    779 ; SKX:       ## BB#0:
    780 ; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
    781 ; SKX-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
    782 ; SKX-NEXT:    retq
    783   %val = load i32, i32* %ptr
    784   %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
    785   %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
    786   ret <4 x i32> %r2
    787 }
    788 
    789 define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
    790 ; KNL-LABEL: insert_v32i16:
    791 ; KNL:       ## BB#0:
    792 ; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm2
    793 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    794 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
    795 ; KNL-NEXT:    vpinsrw $1, %edi, %xmm2, %xmm2
    796 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    797 ; KNL-NEXT:    retq
    798 ;
    799 ; SKX-LABEL: insert_v32i16:
    800 ; SKX:       ## BB#0:
    801 ; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
    802 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    803 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
    804 ; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
    805 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
    806 ; SKX-NEXT:    retq
    807   %val = load i16, i16* %ptr
    808   %r1 = insertelement <32 x i16> %x, i16 %val, i32 1
    809   %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
    810   ret <32 x i16> %r2
    811 }
    812 
    813 define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
    814 ; KNL-LABEL: insert_v16i16:
    815 ; KNL:       ## BB#0:
    816 ; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
    817 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    818 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    819 ; KNL-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
    820 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    821 ; KNL-NEXT:    retq
    822 ;
    823 ; SKX-LABEL: insert_v16i16:
    824 ; SKX:       ## BB#0:
    825 ; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
    826 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    827 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
    828 ; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
    829 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    830 ; SKX-NEXT:    retq
    831   %val = load i16, i16* %ptr
    832   %r1 = insertelement <16 x i16> %x, i16 %val, i32 1
    833   %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
    834   ret <16 x i16> %r2
    835 }
    836 
    837 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
    838 ; KNL-LABEL: insert_v8i16:
    839 ; KNL:       ## BB#0:
    840 ; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
    841 ; KNL-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
    842 ; KNL-NEXT:    retq
    843 ;
    844 ; SKX-LABEL: insert_v8i16:
    845 ; SKX:       ## BB#0:
    846 ; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
    847 ; SKX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
    848 ; SKX-NEXT:    retq
    849   %val = load i16, i16* %ptr
    850   %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
    851   %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
    852   ret <8 x i16> %r2
    853 }
    854 
    855 define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
    856 ; KNL-LABEL: insert_v64i8:
    857 ; KNL:       ## BB#0:
    858 ; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm2
    859 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    860 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
    861 ; KNL-NEXT:    vpinsrb $2, %edi, %xmm2, %xmm2
    862 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    863 ; KNL-NEXT:    retq
    864 ;
    865 ; SKX-LABEL: insert_v64i8:
    866 ; SKX:       ## BB#0:
    867 ; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
    868 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    869 ; SKX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
    870 ; SKX-NEXT:    vpinsrb $2, %edi, %xmm1, %xmm1
    871 ; SKX-NEXT:    vinserti32x4 $3, %xmm1, %zmm0, %zmm0
    872 ; SKX-NEXT:    retq
    873   %val = load i8, i8* %ptr
    874   %r1 = insertelement <64 x i8> %x, i8 %val, i32 1
    875   %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
    876   ret <64 x i8> %r2
    877 }
    878 
    879 define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
    880 ; KNL-LABEL: insert_v32i8:
    881 ; KNL:       ## BB#0:
    882 ; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
    883 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    884 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    885 ; KNL-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
    886 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    887 ; KNL-NEXT:    retq
    888 ;
    889 ; SKX-LABEL: insert_v32i8:
    890 ; SKX:       ## BB#0:
    891 ; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
    892 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    893 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
    894 ; SKX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
    895 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    896 ; SKX-NEXT:    retq
    897   %val = load i8, i8* %ptr
    898   %r1 = insertelement <32 x i8> %x, i8 %val, i32 1
    899   %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
    900   ret <32 x i8> %r2
    901 }
    902 
    903 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
    904 ; KNL-LABEL: insert_v16i8:
    905 ; KNL:       ## BB#0:
    906 ; KNL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
    907 ; KNL-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
    908 ; KNL-NEXT:    retq
    909 ;
    910 ; SKX-LABEL: insert_v16i8:
    911 ; SKX:       ## BB#0:
    912 ; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
    913 ; SKX-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
    914 ; SKX-NEXT:    retq
    915   %val = load i8, i8* %ptr
    916   %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
    917   %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
    918   ret <16 x i8> %r2
    919 }
    920 
    921 define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
    922 ; KNL-LABEL: test_insert_128_v8i64:
    923 ; KNL:       ## BB#0:
    924 ; KNL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
    925 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    926 ; KNL-NEXT:    retq
    927 ;
    928 ; SKX-LABEL: test_insert_128_v8i64:
    929 ; SKX:       ## BB#0:
    930 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
    931 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
    932 ; SKX-NEXT:    retq
    933   %r = insertelement <8 x i64> %x, i64 %y, i32 1
    934   ret <8 x i64> %r
    935 }
    936 
    937 define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
    938 ; KNL-LABEL: test_insert_128_v16i32:
    939 ; KNL:       ## BB#0:
    940 ; KNL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
    941 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    942 ; KNL-NEXT:    retq
    943 ;
    944 ; SKX-LABEL: test_insert_128_v16i32:
    945 ; SKX:       ## BB#0:
    946 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
    947 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    948 ; SKX-NEXT:    retq
    949   %r = insertelement <16 x i32> %x, i32 %y, i32 1
    950   ret <16 x i32> %r
    951 }
    952 
    953 define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
    954 ; KNL-LABEL: test_insert_128_v8f64:
    955 ; KNL:       ## BB#0:
    956 ; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
    957 ; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
    958 ; KNL-NEXT:    retq
    959 ;
    960 ; SKX-LABEL: test_insert_128_v8f64:
    961 ; SKX:       ## BB#0:
    962 ; SKX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
    963 ; SKX-NEXT:    vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
    964 ; SKX-NEXT:    retq
    965   %r = insertelement <8 x double> %x, double %y, i32 1
    966   ret <8 x double> %r
    967 }
    968 
    969 define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
    970 ; KNL-LABEL: test_insert_128_v16f32:
    971 ; KNL:       ## BB#0:
    972 ; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
    973 ; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
    974 ; KNL-NEXT:    retq
    975 ;
    976 ; SKX-LABEL: test_insert_128_v16f32:
    977 ; SKX:       ## BB#0:
    978 ; SKX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
    979 ; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
    980 ; SKX-NEXT:    retq
    981   %r = insertelement <16 x float> %x, float %y, i32 1
    982   ret <16 x float> %r
    983 }
    984 
    985 define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
    986 ; KNL-LABEL: test_insert_128_v16i16:
    987 ; KNL:       ## BB#0:
    988 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    989 ; KNL-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
    990 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    991 ; KNL-NEXT:    retq
    992 ;
    993 ; SKX-LABEL: test_insert_128_v16i16:
    994 ; SKX:       ## BB#0:
    995 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
    996 ; SKX-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
    997 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    998 ; SKX-NEXT:    retq
    999   %r = insertelement <16 x i16> %x, i16 %y, i32 10
   1000   ret <16 x i16> %r
   1001 }
   1002 
   1003 define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
   1004 ; KNL-LABEL: test_insert_128_v32i8:
   1005 ; KNL:       ## BB#0:
   1006 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1007 ; KNL-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
   1008 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1009 ; KNL-NEXT:    retq
   1010 ;
   1011 ; SKX-LABEL: test_insert_128_v32i8:
   1012 ; SKX:       ## BB#0:
   1013 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
   1014 ; SKX-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
   1015 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
   1016 ; SKX-NEXT:    retq
   1017   %r = insertelement <32 x i8> %x, i8 %y, i32 20
   1018   ret <32 x i8> %r
   1019 }
   1020