Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
     10 
     11 define <2 x i64> @insert_v2i64_x1(<2 x i64> %a) {
     12 ; SSE2-LABEL: insert_v2i64_x1:
     13 ; SSE2:       # %bb.0:
     14 ; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
     15 ; SSE2-NEXT:    retq
     16 ;
     17 ; SSE3-LABEL: insert_v2i64_x1:
     18 ; SSE3:       # %bb.0:
     19 ; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
     20 ; SSE3-NEXT:    retq
     21 ;
     22 ; SSSE3-LABEL: insert_v2i64_x1:
     23 ; SSSE3:       # %bb.0:
     24 ; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
     25 ; SSSE3-NEXT:    retq
     26 ;
     27 ; SSE41-LABEL: insert_v2i64_x1:
     28 ; SSE41:       # %bb.0:
     29 ; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
     30 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
     31 ; SSE41-NEXT:    retq
     32 ;
     33 ; AVX1-LABEL: insert_v2i64_x1:
     34 ; AVX1:       # %bb.0:
     35 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
     36 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
     37 ; AVX1-NEXT:    retq
     38 ;
     39 ; AVX2-LABEL: insert_v2i64_x1:
     40 ; AVX2:       # %bb.0:
     41 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
     42 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
     43 ; AVX2-NEXT:    retq
     44 ;
     45 ; AVX512-LABEL: insert_v2i64_x1:
     46 ; AVX512:       # %bb.0:
     47 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
     48 ; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
     49 ; AVX512-NEXT:    retq
     50   %1 = insertelement <2 x i64> %a, i64 -1, i32 0
     51   ret <2 x i64> %1
     52 }
     53 
     54 define <4 x i64> @insert_v4i64_01x3(<4 x i64> %a) {
     55 ; SSE2-LABEL: insert_v4i64_01x3:
     56 ; SSE2:       # %bb.0:
     57 ; SSE2-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
     58 ; SSE2-NEXT:    retq
     59 ;
     60 ; SSE3-LABEL: insert_v4i64_01x3:
     61 ; SSE3:       # %bb.0:
     62 ; SSE3-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
     63 ; SSE3-NEXT:    retq
     64 ;
     65 ; SSSE3-LABEL: insert_v4i64_01x3:
     66 ; SSSE3:       # %bb.0:
     67 ; SSSE3-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
     68 ; SSSE3-NEXT:    retq
     69 ;
     70 ; SSE41-LABEL: insert_v4i64_01x3:
     71 ; SSE41:       # %bb.0:
     72 ; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
     73 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
     74 ; SSE41-NEXT:    retq
     75 ;
     76 ; AVX1-LABEL: insert_v4i64_01x3:
     77 ; AVX1:       # %bb.0:
     78 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
     79 ; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
     80 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
     81 ; AVX1-NEXT:    retq
     82 ;
     83 ; AVX2-LABEL: insert_v4i64_01x3:
     84 ; AVX2:       # %bb.0:
     85 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
     86 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
     87 ; AVX2-NEXT:    retq
     88 ;
     89 ; AVX512-LABEL: insert_v4i64_01x3:
     90 ; AVX512:       # %bb.0:
     91 ; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
     92 ; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
     93 ; AVX512-NEXT:    retq
     94   %1 = insertelement <4 x i64> %a, i64 -1, i32 2
     95   ret <4 x i64> %1
     96 }
     97 
     98 define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
     99 ; SSE2-LABEL: insert_v4i32_01x3:
    100 ; SSE2:       # %bb.0:
    101 ; SSE2-NEXT:    movl $-1, %eax
    102 ; SSE2-NEXT:    movd %eax, %xmm1
    103 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    104 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    105 ; SSE2-NEXT:    retq
    106 ;
    107 ; SSE3-LABEL: insert_v4i32_01x3:
    108 ; SSE3:       # %bb.0:
    109 ; SSE3-NEXT:    movl $-1, %eax
    110 ; SSE3-NEXT:    movd %eax, %xmm1
    111 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    112 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    113 ; SSE3-NEXT:    retq
    114 ;
    115 ; SSSE3-LABEL: insert_v4i32_01x3:
    116 ; SSSE3:       # %bb.0:
    117 ; SSSE3-NEXT:    movl $-1, %eax
    118 ; SSSE3-NEXT:    movd %eax, %xmm1
    119 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    120 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    121 ; SSSE3-NEXT:    retq
    122 ;
    123 ; SSE41-LABEL: insert_v4i32_01x3:
    124 ; SSE41:       # %bb.0:
    125 ; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
    126 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
    127 ; SSE41-NEXT:    retq
    128 ;
    129 ; AVX1-LABEL: insert_v4i32_01x3:
    130 ; AVX1:       # %bb.0:
    131 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    132 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
    133 ; AVX1-NEXT:    retq
    134 ;
    135 ; AVX2-LABEL: insert_v4i32_01x3:
    136 ; AVX2:       # %bb.0:
    137 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    138 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    139 ; AVX2-NEXT:    retq
    140 ;
    141 ; AVX512-LABEL: insert_v4i32_01x3:
    142 ; AVX512:       # %bb.0:
    143 ; AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    144 ; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    145 ; AVX512-NEXT:    retq
    146   %1 = insertelement <4 x i32> %a, i32 -1, i32 2
    147   ret <4 x i32> %1
    148 }
    149 
    150 define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
    151 ; SSE2-LABEL: insert_v8i32_x12345x7:
    152 ; SSE2:       # %bb.0:
    153 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    154 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    155 ; SSE2-NEXT:    movl $-1, %eax
    156 ; SSE2-NEXT:    movd %eax, %xmm2
    157 ; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    158 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    159 ; SSE2-NEXT:    retq
    160 ;
    161 ; SSE3-LABEL: insert_v8i32_x12345x7:
    162 ; SSE3:       # %bb.0:
    163 ; SSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    164 ; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    165 ; SSE3-NEXT:    movl $-1, %eax
    166 ; SSE3-NEXT:    movd %eax, %xmm2
    167 ; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    168 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    169 ; SSE3-NEXT:    retq
    170 ;
    171 ; SSSE3-LABEL: insert_v8i32_x12345x7:
    172 ; SSSE3:       # %bb.0:
    173 ; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    174 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    175 ; SSSE3-NEXT:    movl $-1, %eax
    176 ; SSSE3-NEXT:    movd %eax, %xmm2
    177 ; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    178 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    179 ; SSSE3-NEXT:    retq
    180 ;
    181 ; SSE41-LABEL: insert_v8i32_x12345x7:
    182 ; SSE41:       # %bb.0:
    183 ; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
    184 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
    185 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
    186 ; SSE41-NEXT:    retq
    187 ;
    188 ; AVX1-LABEL: insert_v8i32_x12345x7:
    189 ; AVX1:       # %bb.0:
    190 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    191 ; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
    192 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
    193 ; AVX1-NEXT:    retq
    194 ;
    195 ; AVX2-LABEL: insert_v8i32_x12345x7:
    196 ; AVX2:       # %bb.0:
    197 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
    198 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
    199 ; AVX2-NEXT:    retq
    200 ;
    201 ; AVX512-LABEL: insert_v8i32_x12345x7:
    202 ; AVX512:       # %bb.0:
    203 ; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
    204 ; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
    205 ; AVX512-NEXT:    retq
    206   %1 = insertelement <8 x i32> %a, i32 -1, i32 0
    207   %2 = insertelement <8 x i32> %1, i32 -1, i32 6
    208   ret <8 x i32> %2
    209 }
    210 
    211 define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
    212 ; SSE2-LABEL: insert_v8i16_x12345x7:
    213 ; SSE2:       # %bb.0:
    214 ; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
    215 ; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
    216 ; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
    217 ; SSE2-NEXT:    retq
    218 ;
    219 ; SSE3-LABEL: insert_v8i16_x12345x7:
    220 ; SSE3:       # %bb.0:
    221 ; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
    222 ; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
    223 ; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
    224 ; SSE3-NEXT:    retq
    225 ;
    226 ; SSSE3-LABEL: insert_v8i16_x12345x7:
    227 ; SSSE3:       # %bb.0:
    228 ; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
    229 ; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
    230 ; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
    231 ; SSSE3-NEXT:    retq
    232 ;
    233 ; SSE41-LABEL: insert_v8i16_x12345x7:
    234 ; SSE41:       # %bb.0:
    235 ; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
    236 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
    237 ; SSE41-NEXT:    retq
    238 ;
    239 ; AVX-LABEL: insert_v8i16_x12345x7:
    240 ; AVX:       # %bb.0:
    241 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    242 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
    243 ; AVX-NEXT:    retq
    244   %1 = insertelement <8 x i16> %a, i16 -1, i32 0
    245   %2 = insertelement <8 x i16> %1, i16 -1, i32 6
    246   ret <8 x i16> %2
    247 }
    248 
    249 define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
    250 ; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
    251 ; SSE2:       # %bb.0:
    252 ; SSE2-NEXT:    movl $65535, %eax # imm = 0xFFFF
    253 ; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
    254 ; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
    255 ; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
    256 ; SSE2-NEXT:    retq
    257 ;
    258 ; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
    259 ; SSE3:       # %bb.0:
    260 ; SSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
    261 ; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
    262 ; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
    263 ; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
    264 ; SSE3-NEXT:    retq
    265 ;
    266 ; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
    267 ; SSSE3:       # %bb.0:
    268 ; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
    269 ; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
    270 ; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
    271 ; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
    272 ; SSSE3-NEXT:    retq
    273 ;
    274 ; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
    275 ; SSE41:       # %bb.0:
    276 ; SSE41-NEXT:    pcmpeqd %xmm2, %xmm2
    277 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
    278 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
    279 ; SSE41-NEXT:    retq
    280 ;
    281 ; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
    282 ; AVX1:       # %bb.0:
    283 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    284 ; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
    285 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    286 ; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
    287 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    288 ; AVX1-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
    289 ; AVX1-NEXT:    retq
    290 ;
    291 ; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
    292 ; AVX2:       # %bb.0:
    293 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    294 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
    295 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
    296 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    297 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
    298 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
    299 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
    300 ; AVX2-NEXT:    retq
    301 ;
    302 ; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
    303 ; AVX512F:       # %bb.0:
    304 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    305 ; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
    306 ; AVX512F-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
    307 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    308 ; AVX512F-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
    309 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
    310 ; AVX512F-NEXT:    vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
    311 ; AVX512F-NEXT:    retq
    312 ;
    313 ; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:
    314 ; AVX512VL:       # %bb.0:
    315 ; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
    316 ; AVX512VL-NEXT:    movw $1, %ax
    317 ; AVX512VL-NEXT:    kmovd %eax, %k1
    318 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    319 ; AVX512VL-NEXT:    movw $64, %ax
    320 ; AVX512VL-NEXT:    kmovd %eax, %k1
    321 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    322 ; AVX512VL-NEXT:    movw $-32768, %ax # imm = 0x8000
    323 ; AVX512VL-NEXT:    kmovd %eax, %k1
    324 ; AVX512VL-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1}
    325 ; AVX512VL-NEXT:    retq
    326   %1 = insertelement <16 x i16> %a, i16 -1, i32 0
    327   %2 = insertelement <16 x i16> %1, i16 -1, i32 6
    328   %3 = insertelement <16 x i16> %2, i16 -1, i32 15
    329   ret <16 x i16> %3
    330 }
    331 
    332 define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
    333 ; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
    334 ; SSE2:       # %bb.0:
    335 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    336 ; SSE2-NEXT:    pand %xmm1, %xmm0
    337 ; SSE2-NEXT:    movl $255, %eax
    338 ; SSE2-NEXT:    movd %eax, %xmm2
    339 ; SSE2-NEXT:    pandn %xmm2, %xmm1
    340 ; SSE2-NEXT:    por %xmm1, %xmm0
    341 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    342 ; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
    343 ; SSE2-NEXT:    por %xmm2, %xmm0
    344 ; SSE2-NEXT:    retq
    345 ;
    346 ; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
    347 ; SSE3:       # %bb.0:
    348 ; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    349 ; SSE3-NEXT:    pand %xmm1, %xmm0
    350 ; SSE3-NEXT:    movl $255, %eax
    351 ; SSE3-NEXT:    movd %eax, %xmm2
    352 ; SSE3-NEXT:    pandn %xmm2, %xmm1
    353 ; SSE3-NEXT:    por %xmm1, %xmm0
    354 ; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
    355 ; SSE3-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
    356 ; SSE3-NEXT:    por %xmm2, %xmm0
    357 ; SSE3-NEXT:    retq
    358 ;
    359 ; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
    360 ; SSSE3:       # %bb.0:
    361 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    362 ; SSSE3-NEXT:    movl $255, %eax
    363 ; SSSE3-NEXT:    movd %eax, %xmm1
    364 ; SSSE3-NEXT:    movdqa %xmm1, %xmm2
    365 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    366 ; SSSE3-NEXT:    por %xmm2, %xmm0
    367 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
    368 ; SSSE3-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
    369 ; SSSE3-NEXT:    por %xmm1, %xmm0
    370 ; SSSE3-NEXT:    retq
    371 ;
    372 ; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
    373 ; SSE41:       # %bb.0:
    374 ; SSE41-NEXT:    movl $255, %eax
    375 ; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
    376 ; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
    377 ; SSE41-NEXT:    retq
    378 ;
    379 ; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
    380 ; AVX:       # %bb.0:
    381 ; AVX-NEXT:    movl $255, %eax
    382 ; AVX-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
    383 ; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    384 ; AVX-NEXT:    retq
    385   %1 = insertelement <16 x i8> %a, i8 -1, i32 0
    386   %2 = insertelement <16 x i8> %1, i8 -1, i32 15
    387   ret <16 x i8> %2
    388 }
    389 
    390 define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
    391 ; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    392 ; SSE2:       # %bb.0:
    393 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    394 ; SSE2-NEXT:    pand %xmm2, %xmm0
    395 ; SSE2-NEXT:    movl $255, %eax
    396 ; SSE2-NEXT:    movd %eax, %xmm3
    397 ; SSE2-NEXT:    pandn %xmm3, %xmm2
    398 ; SSE2-NEXT:    por %xmm2, %xmm0
    399 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    400 ; SSE2-NEXT:    pand %xmm2, %xmm0
    401 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
    402 ; SSE2-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
    403 ; SSE2-NEXT:    por %xmm4, %xmm0
    404 ; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
    405 ; SSE2-NEXT:    pand %xmm5, %xmm1
    406 ; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
    407 ; SSE2-NEXT:    pandn %xmm3, %xmm5
    408 ; SSE2-NEXT:    por %xmm5, %xmm1
    409 ; SSE2-NEXT:    pand %xmm2, %xmm1
    410 ; SSE2-NEXT:    por %xmm4, %xmm1
    411 ; SSE2-NEXT:    retq
    412 ;
    413 ; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    414 ; SSE3:       # %bb.0:
    415 ; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    416 ; SSE3-NEXT:    pand %xmm2, %xmm0
    417 ; SSE3-NEXT:    movl $255, %eax
    418 ; SSE3-NEXT:    movd %eax, %xmm3
    419 ; SSE3-NEXT:    pandn %xmm3, %xmm2
    420 ; SSE3-NEXT:    por %xmm2, %xmm0
    421 ; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    422 ; SSE3-NEXT:    pand %xmm2, %xmm0
    423 ; SSE3-NEXT:    movdqa %xmm3, %xmm4
    424 ; SSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
    425 ; SSE3-NEXT:    por %xmm4, %xmm0
    426 ; SSE3-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
    427 ; SSE3-NEXT:    pand %xmm5, %xmm1
    428 ; SSE3-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
    429 ; SSE3-NEXT:    pandn %xmm3, %xmm5
    430 ; SSE3-NEXT:    por %xmm5, %xmm1
    431 ; SSE3-NEXT:    pand %xmm2, %xmm1
    432 ; SSE3-NEXT:    por %xmm4, %xmm1
    433 ; SSE3-NEXT:    retq
    434 ;
    435 ; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    436 ; SSSE3:       # %bb.0:
    437 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    438 ; SSSE3-NEXT:    movl $255, %eax
    439 ; SSSE3-NEXT:    movd %eax, %xmm2
    440 ; SSSE3-NEXT:    movdqa %xmm2, %xmm3
    441 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    442 ; SSSE3-NEXT:    por %xmm3, %xmm0
    443 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128]
    444 ; SSSE3-NEXT:    pshufb %xmm3, %xmm0
    445 ; SSSE3-NEXT:    movdqa %xmm2, %xmm4
    446 ; SSSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
    447 ; SSSE3-NEXT:    por %xmm4, %xmm0
    448 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
    449 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero
    450 ; SSSE3-NEXT:    por %xmm2, %xmm1
    451 ; SSSE3-NEXT:    pshufb %xmm3, %xmm1
    452 ; SSSE3-NEXT:    por %xmm4, %xmm1
    453 ; SSSE3-NEXT:    retq
    454 ;
    455 ; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    456 ; SSE41:       # %bb.0:
    457 ; SSE41-NEXT:    movl $255, %eax
    458 ; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
    459 ; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
    460 ; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
    461 ; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
    462 ; SSE41-NEXT:    retq
    463 ;
    464 ; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    465 ; AVX1:       # %bb.0:
    466 ; AVX1-NEXT:    movl $255, %eax
    467 ; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
    468 ; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    469 ; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    470 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    471 ; AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    472 ; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    473 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    474 ; AVX1-NEXT:    retq
    475 ;
    476 ; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    477 ; AVX2:       # %bb.0:
    478 ; AVX2-NEXT:    movl $255, %eax
    479 ; AVX2-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
    480 ; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    481 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    482 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
    483 ; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    484 ; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    485 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    486 ; AVX2-NEXT:    retq
    487 ;
    488 ; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
    489 ; AVX512:       # %bb.0:
    490 ; AVX512-NEXT:    movl $255, %eax
    491 ; AVX512-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
    492 ; AVX512-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    493 ; AVX512-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    494 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
    495 ; AVX512-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    496 ; AVX512-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    497 ; AVX512-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    498 ; AVX512-NEXT:    retq
    499   %1 = insertelement <32 x i8> %a, i8 -1, i32 0
    500   %2 = insertelement <32 x i8> %1, i8 -1, i32 15
    501   %3 = insertelement <32 x i8> %2, i8 -1, i32 30
    502   %4 = insertelement <32 x i8> %3, i8 -1, i32 31
    503   ret <32 x i8> %4
    504 }
    505