Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s
      2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s
      3 
      4 ;CHECK-LABEL: test1:
      5 ;CHECK: vinsertps
      6 ;CHECK: vinsertf32x4
      7 ;CHECK: ret
      8 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
      9   %rrr = load float, float* %br
     10   %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
     11   %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
     12   ret <16 x float> %rrr3
     13 }
     14 
     15 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
     16 ; KNL-LABEL: test2:
     17 ; KNL:       ## BB#0:
     18 ; KNL-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
     19 ; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
     20 ; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
     21 ; KNL-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
     22 ; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
     23 ; KNL-NEXT:    retq
     24 ;
     25 ; SKX-LABEL: test2:
     26 ; SKX:       ## BB#0:
     27 ; SKX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
     28 ; SKX-NEXT:    vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
     29 ; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm2
     30 ; SKX-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
     31 ; SKX-NEXT:    vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
     32 ; SKX-NEXT:    retq
     33   %rrr = load double, double* %br
     34   %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
     35   %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
     36   ret <8 x double> %rrr3
     37 }
     38 
     39 ;CHECK-LABEL: test3:
     40 ;CHECK: vextractf32x4 $1
     41 ;CHECK: vinsertf32x4 $0
     42 ;CHECK: ret
     43 define <16 x float> @test3(<16 x float> %x) nounwind {
     44   %eee = extractelement <16 x float> %x, i32 4
     45   %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
     46   ret <16 x float> %rrr2
     47 }
     48 
     49 define <8 x i64> @test4(<8 x i64> %x) nounwind {
     50 ; KNL-LABEL: test4:
     51 ; KNL:       ## BB#0:
     52 ; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
     53 ; KNL-NEXT:    vmovq %xmm1, %rax
     54 ; KNL-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
     55 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
     56 ; KNL-NEXT:    retq
     57 ;
     58 ; SKX-LABEL: test4:
     59 ; SKX:       ## BB#0:
     60 ; SKX-NEXT:    vextracti64x2 $2, %zmm0, %xmm1
     61 ; SKX-NEXT:    vmovq %xmm1, %rax
     62 ; SKX-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
     63 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
     64 ; SKX-NEXT:    retq
     65   %eee = extractelement <8 x i64> %x, i32 4
     66   %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
     67   ret <8 x i64> %rrr2
     68 }
     69 
     70 ;CHECK-LABEL: test5:
     71 ;CHECK: vextractps
     72 ;CHECK: ret
     73 define i32 @test5(<4 x float> %x) nounwind {
     74   %ef = extractelement <4 x float> %x, i32 3
     75   %ei = bitcast float %ef to i32
     76   ret i32 %ei
     77 }
     78 
     79 ;CHECK-LABEL: test6:
     80 ;CHECK: vextractps {{.*}}, (%rdi)
     81 ;CHECK: ret
     82 define void @test6(<4 x float> %x, float* %out) nounwind {
     83   %ef = extractelement <4 x float> %x, i32 3
     84   store float %ef, float* %out, align 4
     85   ret void
     86 }
     87 
     88 ;CHECK-LABEL: test7
     89 ;CHECK: vmovd
     90 ;CHECK: vpermps %zmm
     91 ;CHECK: ret
     92 define float @test7(<16 x float> %x, i32 %ind) nounwind {
     93   %e = extractelement <16 x float> %x, i32 %ind
     94   ret float %e
     95 }
     96 
     97 ;CHECK-LABEL: test8
     98 ;CHECK: vmovq
     99 ;CHECK: vpermpd %zmm
    100 ;CHECK: ret
    101 define double @test8(<8 x double> %x, i32 %ind) nounwind {
    102   %e = extractelement <8 x double> %x, i32 %ind
    103   ret double %e
    104 }
    105 
    106 ;CHECK-LABEL: test9
    107 ;CHECK: vmovd
    108 ;CHECK: vpermps %ymm
    109 ;CHECK: ret
    110 define float @test9(<8 x float> %x, i32 %ind) nounwind {
    111   %e = extractelement <8 x float> %x, i32 %ind
    112   ret float %e
    113 }
    114 
    115 ;CHECK-LABEL: test10
    116 ;CHECK: vmovd
    117 ;CHECK: vpermd %zmm
    118 ;CHECK: vmovd  %xmm0, %eax
    119 ;CHECK: ret
    120 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
    121   %e = extractelement <16 x i32> %x, i32 %ind
    122   ret i32 %e
    123 }
    124 
    125 ;CHECK-LABEL: test11
    126 ;CHECK: vpcmpltud
    127 ;CHECK: kshiftlw $11
    128 ;CHECK: kshiftrw $15
    129 ;CHECK: testb
    130 ;CHECK: je
    131 ;CHECK: ret
    132 ;CHECK: ret
    133 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
    134   %cmp_res = icmp ult <16 x i32> %a, %b
    135   %ia = extractelement <16 x i1> %cmp_res, i32 4
    136   br i1 %ia, label %A, label %B
    137   A:
    138     ret <16 x i32>%b
    139   B:
    140    %c = add <16 x i32>%b, %a
    141    ret <16 x i32>%c
    142 }
    143 
    144 ;CHECK-LABEL: test12
    145 ;CHECK: vpcmpgtq
    146 ;CHECK: kshiftlw $15
    147 ;CHECK: kshiftrw $15
    148 ;CHECK: testb
    149 ;CHECK: ret
    150 
    151 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
    152 
    153   %cmpvector_func.i = icmp slt <16 x i64> %a, %b
    154   %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
    155   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
    156   ret i64 %res
    157 }
    158 
    159 ;CHECK-LABEL: test13
    160 ;CHECK: cmpl    %esi, %edi
    161 ;CHECK: setb    %al
    162 ;CHECK: andl    $1, %eax
    163 ;CHECK: kmovw   %eax, %k0
    164 ;CHECK: movw    $-4
    165 ;CHECK: korw
    166 define i16 @test13(i32 %a, i32 %b) {
    167   %cmp_res = icmp ult i32 %a, %b
    168   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
    169   %res = bitcast <16 x i1> %maskv to i16
    170   ret i16 %res
    171 }
    172 
    173 ;CHECK-LABEL: test14
    174 ;CHECK: vpcmpgtq
    175 ;KNL: kshiftlw $11
    176 ;KNL: kshiftrw $15
    177 ;KNL: testb
    178 ;SKX: kshiftlb $3
    179 ;SKX: kshiftrb $7
    180 ;SKX: testb
    181 ;CHECK: ret
    182 
    183 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
    184 
    185   %cmpvector_func.i = icmp slt <8 x i64> %a, %b
    186   %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
    187   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
    188   ret i64 %res
    189 }
    190 
    191 ;CHECK-LABEL: test15
    192 ;CHECK: movb (%rdi), %al
    193 ;CHECK: andb $1, %al
    194 ;CHECK: movw    $-1, %ax
    195 ;CHECK: cmovew
    196 define i16 @test15(i1 *%addr) {
    197   %x = load i1 , i1 * %addr, align 1
    198   %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
    199   %x2 = bitcast <16 x i1>%x1 to i16
    200   ret i16 %x2
    201 }
    202 
    203 ;CHECK-LABEL: test16
    204 ;CHECK: movb (%rdi), %al
    205 ;CHECK: andw $1, %ax
    206 ;CHECK: kmovw
    207 ;CHECK: kshiftlw        $10
    208 ;CHECK: korw
    209 ;CHECK: ret
    210 define i16 @test16(i1 *%addr, i16 %a) {
    211   %x = load i1 , i1 * %addr, align 128
    212   %a1 = bitcast i16 %a to <16 x i1>
    213   %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
    214   %x2 = bitcast <16 x i1>%x1 to i16
    215   ret i16 %x2
    216 }
    217 
    218 ;CHECK-LABEL: test17
    219 ;KNL: movb (%rdi), %al
    220 ;KNL: andw $1, %ax
    221 ;KNL: kshiftlw $4
    222 ;KNL: korw
    223 ;SKX: kshiftlb $4
    224 ;SKX: korb
    225 ;CHECK: ret
    226 define i8 @test17(i1 *%addr, i8 %a) {
    227   %x = load i1 , i1 * %addr, align 128
    228   %a1 = bitcast i8 %a to <8 x i1>
    229   %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
    230   %x2 = bitcast <8 x i1>%x1 to i8
    231   ret i8 %x2
    232 }
    233 
    234 define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
    235 ; SKX-LABEL: extract_v8i64:
    236 ; SKX:       ## BB#0:
    237 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax
    238 ; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm0
    239 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
    240 ; SKX-NEXT:    retq
    241   %r1 = extractelement <8 x i64> %x, i32 1
    242   %r2 = extractelement <8 x i64> %x, i32 3
    243   store i64 %r2, i64* %dst, align 1
    244   ret i64 %r1
    245 }
    246 
    247 define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
    248 ; SKX-LABEL: extract_v4i64:
    249 ; SKX:       ## BB#0:
    250 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax
    251 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
    252 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
    253 ; SKX-NEXT:    retq
    254   %r1 = extractelement <4 x i64> %x, i32 1
    255   %r2 = extractelement <4 x i64> %x, i32 3
    256   store i64 %r2, i64* %dst, align 1
    257   ret i64 %r1
    258 }
    259 
    260 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
    261 ; SKX-LABEL: extract_v2i64:
    262 ; SKX:       ## BB#0:
    263 ; SKX-NEXT:    vmovq %xmm0, %rax
    264 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
    265 ; SKX-NEXT:    retq
    266   %r1 = extractelement <2 x i64> %x, i32 0
    267   %r2 = extractelement <2 x i64> %x, i32 1
    268   store i64 %r2, i64* %dst, align 1
    269   ret i64 %r1
    270 }
    271 
    272 define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
    273 ; SKX-LABEL: extract_v16i32:
    274 ; SKX:       ## BB#0:
    275 ; SKX-NEXT:    vpextrd $1, %xmm0, %eax
    276 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    277 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
    278 ; SKX-NEXT:    retq
    279   %r1 = extractelement <16 x i32> %x, i32 1
    280   %r2 = extractelement <16 x i32> %x, i32 5
    281   store i32 %r2, i32* %dst, align 1
    282   ret i32 %r1
    283 }
    284 
    285 define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
    286 ; SKX-LABEL: extract_v8i32:
    287 ; SKX:       ## BB#0:
    288 ; SKX-NEXT:    vpextrd $1, %xmm0, %eax
    289 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
    290 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
    291 ; SKX-NEXT:    retq
    292   %r1 = extractelement <8 x i32> %x, i32 1
    293   %r2 = extractelement <8 x i32> %x, i32 5
    294   store i32 %r2, i32* %dst, align 1
    295   ret i32 %r1
    296 }
    297 
    298 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
    299 ; SKX-LABEL: extract_v4i32:
    300 ; SKX:       ## BB#0:
    301 ; SKX-NEXT:    vpextrd $1, %xmm0, %eax
    302 ; SKX-NEXT:    vpextrd $3, %xmm0, (%rdi)
    303 ; SKX-NEXT:    retq
    304   %r1 = extractelement <4 x i32> %x, i32 1
    305   %r2 = extractelement <4 x i32> %x, i32 3
    306   store i32 %r2, i32* %dst, align 1
    307   ret i32 %r1
    308 }
    309 
    310 define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
    311 ; SKX-LABEL: extract_v32i16:
    312 ; SKX:       ## BB#0:
    313 ; SKX-NEXT:    vpextrw $1, %xmm0, %eax
    314 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    315 ; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
    316 ; SKX-NEXT:    retq
    317   %r1 = extractelement <32 x i16> %x, i32 1
    318   %r2 = extractelement <32 x i16> %x, i32 9
    319   store i16 %r2, i16* %dst, align 1
    320   ret i16 %r1
    321 }
    322 
    323 define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
    324 ; SKX-LABEL: extract_v16i16:
    325 ; SKX:       ## BB#0:
    326 ; SKX-NEXT:    vpextrw $1, %xmm0, %eax
    327 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
    328 ; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
    329 ; SKX-NEXT:    retq
    330   %r1 = extractelement <16 x i16> %x, i32 1
    331   %r2 = extractelement <16 x i16> %x, i32 9
    332   store i16 %r2, i16* %dst, align 1
    333   ret i16 %r1
    334 }
    335 
    336 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
    337 ; SKX-LABEL: extract_v8i16:
    338 ; SKX:       ## BB#0:
    339 ; SKX-NEXT:    vpextrw $1, %xmm0, %eax
    340 ; SKX-NEXT:    vpextrw $3, %xmm0, (%rdi)
    341 ; SKX-NEXT:    retq
    342   %r1 = extractelement <8 x i16> %x, i32 1
    343   %r2 = extractelement <8 x i16> %x, i32 3
    344   store i16 %r2, i16* %dst, align 1
    345   ret i16 %r1
    346 }
    347 
    348 define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
    349 ; SKX-LABEL: extract_v64i8:
    350 ; SKX:       ## BB#0:
    351 ; SKX-NEXT:    vpextrb $1, %xmm0, %eax
    352 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
    353 ; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
    354 ; SKX-NEXT:    retq
    355   %r1 = extractelement <64 x i8> %x, i32 1
    356   %r2 = extractelement <64 x i8> %x, i32 17
    357   store i8 %r2, i8* %dst, align 1
    358   ret i8 %r1
    359 }
    360 
    361 define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
    362 ; SKX-LABEL: extract_v32i8:
    363 ; SKX:       ## BB#0:
    364 ; SKX-NEXT:    vpextrb $1, %xmm0, %eax
    365 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
    366 ; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
    367 ; SKX-NEXT:    retq
    368   %r1 = extractelement <32 x i8> %x, i32 1
    369   %r2 = extractelement <32 x i8> %x, i32 17
    370   store i8 %r2, i8* %dst, align 1
    371   ret i8 %r1
    372 }
    373 
    374 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
    375 ; SKX-LABEL: extract_v16i8:
    376 ; SKX:       ## BB#0:
    377 ; SKX-NEXT:    vpextrb $1, %xmm0, %eax
    378 ; SKX-NEXT:    vpextrb $3, %xmm0, (%rdi)
    379 ; SKX-NEXT:    retq
    380   %r1 = extractelement <16 x i8> %x, i32 1
    381   %r2 = extractelement <16 x i8> %x, i32 3
    382   store i8 %r2, i8* %dst, align 1
    383   ret i8 %r1
    384 }
    385 
    386 define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
    387 ; SKX-LABEL: insert_v8i64:
    388 ; SKX:       ## BB#0:
    389 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
    390 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
    391 ; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm1
    392 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
    393 ; SKX-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
    394 ; SKX-NEXT:    retq
    395   %val = load i64, i64* %ptr
    396   %r1 = insertelement <8 x i64> %x, i64 %val, i32 1
    397   %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
    398   ret <8 x i64> %r2
    399 }
    400 
    401 define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
    402 ; SKX-LABEL: insert_v4i64:
    403 ; SKX:       ## BB#0:
    404 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
    405 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    406 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
    407 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
    408 ; SKX-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0
    409 ; SKX-NEXT:    retq
    410   %val = load i64, i64* %ptr
    411   %r1 = insertelement <4 x i64> %x, i64 %val, i32 1
    412   %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
    413   ret <4 x i64> %r2
    414 }
    415 
    416 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
    417 ; SKX-LABEL: insert_v2i64:
    418 ; SKX:       ## BB#0:
    419 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
    420 ; SKX-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
    421 ; SKX-NEXT:    retq
    422   %val = load i64, i64* %ptr
    423   %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
    424   %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
    425   ret <2 x i64> %r2
    426 }
    427 
    428 define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
    429 ; SKX-LABEL: insert_v16i32:
    430 ; SKX:       ## BB#0:
    431 ; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    432 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    433 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
    434 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    435 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
    436 ; SKX-NEXT:    retq
    437   %val = load i32, i32* %ptr
    438   %r1 = insertelement <16 x i32> %x, i32 %val, i32 1
    439   %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
    440   ret <16 x i32> %r2
    441 }
    442 
    443 define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
    444 ; KNL-LABEL: insert_v8i32:
    445 ; KNL:       ## BB#0:
    446 ; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    447 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    448 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    449 ; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    450 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    451 ; KNL-NEXT:    retq
    452 ;
    453 ; SKX-LABEL: insert_v8i32:
    454 ; SKX:       ## BB#0:
    455 ; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
    456 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    457 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
    458 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
    459 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    460 ; SKX-NEXT:    retq
    461   %val = load i32, i32* %ptr
    462   %r1 = insertelement <8 x i32> %x, i32 %val, i32 1
    463   %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
    464   ret <8 x i32> %r2
    465 }
    466 
    467 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
    468 ; KNL-LABEL: insert_v4i32:
    469 ; KNL:       ## BB#0:
    470 ; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
    471 ; KNL-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
    472 ; KNL-NEXT:    retq
    473 ;
    474 ; SKX-LABEL: insert_v4i32:
    475 ; SKX:       ## BB#0:
    476 ; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
    477 ; SKX-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
    478 ; SKX-NEXT:    retq
    479   %val = load i32, i32* %ptr
    480   %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
    481   %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
    482   ret <4 x i32> %r2
    483 }
    484 
    485 define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
    486 ; KNL-LABEL: insert_v32i16:
    487 ; KNL:       ## BB#0:
    488 ; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm2
    489 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    490 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
    491 ; KNL-NEXT:    vpinsrw $1, %edi, %xmm2, %xmm2
    492 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    493 ; KNL-NEXT:    retq
    494 ;
    495 ; SKX-LABEL: insert_v32i16:
    496 ; SKX:       ## BB#0:
    497 ; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
    498 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    499 ; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
    500 ; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
    501 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
    502 ; SKX-NEXT:    retq
    503   %val = load i16, i16* %ptr
    504   %r1 = insertelement <32 x i16> %x, i16 %val, i32 1
    505   %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
    506   ret <32 x i16> %r2
    507 }
    508 
    509 define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
    510 ; KNL-LABEL: insert_v16i16:
    511 ; KNL:       ## BB#0:
    512 ; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
    513 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    514 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    515 ; KNL-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
    516 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    517 ; KNL-NEXT:    retq
    518 ;
    519 ; SKX-LABEL: insert_v16i16:
    520 ; SKX:       ## BB#0:
    521 ; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
    522 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    523 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
    524 ; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
    525 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    526 ; SKX-NEXT:    retq
    527   %val = load i16, i16* %ptr
    528   %r1 = insertelement <16 x i16> %x, i16 %val, i32 1
    529   %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
    530   ret <16 x i16> %r2
    531 }
    532 
    533 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
    534 ; KNL-LABEL: insert_v8i16:
    535 ; KNL:       ## BB#0:
    536 ; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
    537 ; KNL-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
    538 ; KNL-NEXT:    retq
    539 ;
    540 ; SKX-LABEL: insert_v8i16:
    541 ; SKX:       ## BB#0:
    542 ; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
    543 ; SKX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
    544 ; SKX-NEXT:    retq
    545   %val = load i16, i16* %ptr
    546   %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
    547   %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
    548   ret <8 x i16> %r2
    549 }
    550 
    551 define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
    552 ; KNL-LABEL: insert_v64i8:
    553 ; KNL:       ## BB#0:
    554 ; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm2
    555 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    556 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
    557 ; KNL-NEXT:    vpinsrb $2, %edi, %xmm2, %xmm2
    558 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    559 ; KNL-NEXT:    retq
    560 ;
    561 ; SKX-LABEL: insert_v64i8:
    562 ; SKX:       ## BB#0:
    563 ; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
    564 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    565 ; SKX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
    566 ; SKX-NEXT:    vpinsrb $2, %edi, %xmm1, %xmm1
    567 ; SKX-NEXT:    vinserti32x4 $3, %xmm1, %zmm0, %zmm0
    568 ; SKX-NEXT:    retq
    569   %val = load i8, i8* %ptr
    570   %r1 = insertelement <64 x i8> %x, i8 %val, i32 1
    571   %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
    572   ret <64 x i8> %r2
    573 }
    574 
    575 define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
    576 ; SKX-LABEL: insert_v32i8:
    577 ; SKX:       ## BB#0:
    578 ; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
    579 ; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    580 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
    581 ; SKX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
    582 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    583 ; SKX-NEXT:    retq
    584   %val = load i8, i8* %ptr
    585   %r1 = insertelement <32 x i8> %x, i8 %val, i32 1
    586   %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
    587   ret <32 x i8> %r2
    588 }
    589 
    590 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
    591 ; KNL-LABEL: insert_v16i8:
    592 ; KNL:       ## BB#0:
    593 ; KNL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
    594 ; KNL-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
    595 ; KNL-NEXT:    retq
    596 ;
    597 ; SKX-LABEL: insert_v16i8:
    598 ; SKX:       ## BB#0:
    599 ; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
    600 ; SKX-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
    601 ; SKX-NEXT:    retq
    602   %val = load i8, i8* %ptr
    603   %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
    604   %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
    605   ret <16 x i8> %r2
    606 }
    607 
    608 define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
    609 ; KNL-LABEL: test_insert_128_v8i64:
    610 ; KNL:       ## BB#0:
    611 ; KNL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
    612 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    613 ; KNL-NEXT:    retq
    614 ;
    615 ; SKX-LABEL: test_insert_128_v8i64:
    616 ; SKX:       ## BB#0:
    617 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
    618 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
    619 ; SKX-NEXT:    retq
    620   %r = insertelement <8 x i64> %x, i64 %y, i32 1
    621   ret <8 x i64> %r
    622 }
    623 
    624 define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
    625 ; KNL-LABEL: test_insert_128_v16i32:
    626 ; KNL:       ## BB#0:
    627 ; KNL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
    628 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    629 ; KNL-NEXT:    retq
    630 ;
    631 ; SKX-LABEL: test_insert_128_v16i32:
    632 ; SKX:       ## BB#0:
    633 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
    634 ; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
    635 ; SKX-NEXT:    retq
    636   %r = insertelement <16 x i32> %x, i32 %y, i32 1
    637   ret <16 x i32> %r
    638 }
    639 
    640 define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
    641 ; KNL-LABEL: test_insert_128_v8f64:
    642 ; KNL:       ## BB#0:
    643 ; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
    644 ; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
    645 ; KNL-NEXT:    retq
    646 ;
    647 ; SKX-LABEL: test_insert_128_v8f64:
    648 ; SKX:       ## BB#0:
    649 ; SKX-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm1
    650 ; SKX-NEXT:    vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
    651 ; SKX-NEXT:    retq
    652   %r = insertelement <8 x double> %x, double %y, i32 1
    653   ret <8 x double> %r
    654 }
    655 
    656 define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
    657 ; KNL-LABEL: test_insert_128_v16f32:
    658 ; KNL:       ## BB#0:
    659 ; KNL-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm1
    660 ; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
    661 ; KNL-NEXT:    retq
    662 ;
    663 ; SKX-LABEL: test_insert_128_v16f32:
    664 ; SKX:       ## BB#0:
    665 ; SKX-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm1
    666 ; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
    667 ; SKX-NEXT:    retq
    668   %r = insertelement <16 x float> %x, float %y, i32 1
    669   ret <16 x float> %r
    670 }
    671 
    672 define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
    673 ; KNL-LABEL: test_insert_128_v16i16:
    674 ; KNL:       ## BB#0:
    675 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    676 ; KNL-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
    677 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    678 ; KNL-NEXT:    retq
    679 ;
    680 ; SKX-LABEL: test_insert_128_v16i16:
    681 ; SKX:       ## BB#0:
    682 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
    683 ; SKX-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
    684 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    685 ; SKX-NEXT:    retq
    686   %r = insertelement <16 x i16> %x, i16 %y, i32 10
    687   ret <16 x i16> %r
    688 }
    689 
    690 define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
    691 ; KNL-LABEL: test_insert_128_v32i8:
    692 ; KNL:       ## BB#0:
    693 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    694 ; KNL-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
    695 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    696 ; KNL-NEXT:    retq
    697 ;
    698 ; SKX-LABEL: test_insert_128_v32i8:
    699 ; SKX:       ## BB#0:
    700 ; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
    701 ; SKX-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
    702 ; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    703 ; SKX-NEXT:    retq
    704   %r = insertelement <32 x i8> %x, i8 %y, i32 20
    705   ret <32 x i8> %r
    706 }
    707