Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      8 
      9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     10 target triple = "x86_64-unknown-unknown"
     11 
     12 define <2 x double> @insert_v2f64_z1(<2 x double> %a) {
     13 ; SSE2-LABEL: insert_v2f64_z1:
     14 ; SSE2:       # BB#0:
     15 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
     16 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     17 ; SSE2-NEXT:    retq
     18 ;
     19 ; SSE3-LABEL: insert_v2f64_z1:
     20 ; SSE3:       # BB#0:
     21 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
     22 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     23 ; SSE3-NEXT:    retq
     24 ;
     25 ; SSSE3-LABEL: insert_v2f64_z1:
     26 ; SSSE3:       # BB#0:
     27 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
     28 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     29 ; SSSE3-NEXT:    retq
     30 ;
     31 ; SSE41-LABEL: insert_v2f64_z1:
     32 ; SSE41:       # BB#0:
     33 ; SSE41-NEXT:    xorpd %xmm1, %xmm1
     34 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     35 ; SSE41-NEXT:    retq
     36 ;
     37 ; AVX-LABEL: insert_v2f64_z1:
     38 ; AVX:       # BB#0:
     39 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     40 ; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     41 ; AVX-NEXT:    retq
     42   %1 = insertelement <2 x double> %a, double 0.0, i32 0
     43   ret <2 x double> %1
     44 }
     45 
     46 define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) {
     47 ; SSE2-LABEL: insert_v4f64_0zz3:
     48 ; SSE2:       # BB#0:
     49 ; SSE2-NEXT:    xorpd %xmm2, %xmm2
     50 ; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
     51 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
     52 ; SSE2-NEXT:    retq
     53 ;
     54 ; SSE3-LABEL: insert_v4f64_0zz3:
     55 ; SSE3:       # BB#0:
     56 ; SSE3-NEXT:    xorpd %xmm2, %xmm2
     57 ; SSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
     58 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
     59 ; SSE3-NEXT:    retq
     60 ;
     61 ; SSSE3-LABEL: insert_v4f64_0zz3:
     62 ; SSSE3:       # BB#0:
     63 ; SSSE3-NEXT:    xorpd %xmm2, %xmm2
     64 ; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
     65 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
     66 ; SSSE3-NEXT:    retq
     67 ;
     68 ; SSE41-LABEL: insert_v4f64_0zz3:
     69 ; SSE41:       # BB#0:
     70 ; SSE41-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
     71 ; SSE41-NEXT:    xorpd %xmm2, %xmm2
     72 ; SSE41-NEXT:    blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
     73 ; SSE41-NEXT:    retq
     74 ;
     75 ; AVX-LABEL: insert_v4f64_0zz3:
     76 ; AVX:       # BB#0:
     77 ; AVX-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
     78 ; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
     79 ; AVX-NEXT:    retq
     80   %1 = insertelement <4 x double> %a, double 0.0, i32 1
     81   %2 = insertelement <4 x double> %1, double 0.0, i32 2
     82   ret <4 x double> %2
     83 }
     84 
     85 define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) {
     86 ; SSE2-LABEL: insert_v2i64_z1:
     87 ; SSE2:       # BB#0:
     88 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
     89 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     90 ; SSE2-NEXT:    retq
     91 ;
     92 ; SSE3-LABEL: insert_v2i64_z1:
     93 ; SSE3:       # BB#0:
     94 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
     95 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
     96 ; SSE3-NEXT:    retq
     97 ;
     98 ; SSSE3-LABEL: insert_v2i64_z1:
     99 ; SSSE3:       # BB#0:
    100 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    101 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    102 ; SSSE3-NEXT:    retq
    103 ;
    104 ; SSE41-LABEL: insert_v2i64_z1:
    105 ; SSE41:       # BB#0:
    106 ; SSE41-NEXT:    pxor %xmm1, %xmm1
    107 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    108 ; SSE41-NEXT:    retq
    109 ;
    110 ; AVX1-LABEL: insert_v2i64_z1:
    111 ; AVX1:       # BB#0:
    112 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    113 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    114 ; AVX1-NEXT:    retq
    115 ;
    116 ; AVX2-LABEL: insert_v2i64_z1:
    117 ; AVX2:       # BB#0:
    118 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    119 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    120 ; AVX2-NEXT:    retq
    121   %1 = insertelement <2 x i64> %a, i64 0, i32 0
    122   ret <2 x i64> %1
    123 }
    124 
    125 define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) {
    126 ; SSE2-LABEL: insert_v4i64_01z3:
    127 ; SSE2:       # BB#0:
    128 ; SSE2-NEXT:    xorpd %xmm2, %xmm2
    129 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    130 ; SSE2-NEXT:    retq
    131 ;
    132 ; SSE3-LABEL: insert_v4i64_01z3:
    133 ; SSE3:       # BB#0:
    134 ; SSE3-NEXT:    xorpd %xmm2, %xmm2
    135 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    136 ; SSE3-NEXT:    retq
    137 ;
    138 ; SSSE3-LABEL: insert_v4i64_01z3:
    139 ; SSSE3:       # BB#0:
    140 ; SSSE3-NEXT:    xorpd %xmm2, %xmm2
    141 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    142 ; SSSE3-NEXT:    retq
    143 ;
    144 ; SSE41-LABEL: insert_v4i64_01z3:
    145 ; SSE41:       # BB#0:
    146 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    147 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    148 ; SSE41-NEXT:    retq
    149 ;
    150 ; AVX1-LABEL: insert_v4i64_01z3:
    151 ; AVX1:       # BB#0:
    152 ; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    153 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
    154 ; AVX1-NEXT:    retq
    155 ;
    156 ; AVX2-LABEL: insert_v4i64_01z3:
    157 ; AVX2:       # BB#0:
    158 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    159 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    160 ; AVX2-NEXT:    retq
    161   %1 = insertelement <4 x i64> %a, i64 0, i32 2
    162   ret <4 x i64> %1
    163 }
    164 
    165 define <4 x float> @insert_v4f32_01z3(<4 x float> %a) {
    166 ; SSE2-LABEL: insert_v4f32_01z3:
    167 ; SSE2:       # BB#0:
    168 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    169 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    170 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    171 ; SSE2-NEXT:    retq
    172 ;
    173 ; SSE3-LABEL: insert_v4f32_01z3:
    174 ; SSE3:       # BB#0:
    175 ; SSE3-NEXT:    xorps %xmm1, %xmm1
    176 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    177 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    178 ; SSE3-NEXT:    retq
    179 ;
    180 ; SSSE3-LABEL: insert_v4f32_01z3:
    181 ; SSSE3:       # BB#0:
    182 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    183 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    184 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    185 ; SSSE3-NEXT:    retq
    186 ;
    187 ; SSE41-LABEL: insert_v4f32_01z3:
    188 ; SSE41:       # BB#0:
    189 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    190 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    191 ; SSE41-NEXT:    retq
    192 ;
    193 ; AVX-LABEL: insert_v4f32_01z3:
    194 ; AVX:       # BB#0:
    195 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    196 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    197 ; AVX-NEXT:    retq
    198   %1 = insertelement <4 x float> %a, float 0.0, i32 2
    199   ret <4 x float> %1
    200 }
    201 
    202 define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) {
    203 ; SSE2-LABEL: insert_v8f32_z12345z7:
    204 ; SSE2:       # BB#0:
    205 ; SSE2-NEXT:    xorps %xmm2, %xmm2
    206 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    207 ; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    208 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    209 ; SSE2-NEXT:    retq
    210 ;
    211 ; SSE3-LABEL: insert_v8f32_z12345z7:
    212 ; SSE3:       # BB#0:
    213 ; SSE3-NEXT:    xorps %xmm2, %xmm2
    214 ; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    215 ; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    216 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    217 ; SSE3-NEXT:    retq
    218 ;
    219 ; SSSE3-LABEL: insert_v8f32_z12345z7:
    220 ; SSSE3:       # BB#0:
    221 ; SSSE3-NEXT:    xorps %xmm2, %xmm2
    222 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    223 ; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    224 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    225 ; SSSE3-NEXT:    retq
    226 ;
    227 ; SSE41-LABEL: insert_v8f32_z12345z7:
    228 ; SSE41:       # BB#0:
    229 ; SSE41-NEXT:    xorps %xmm2, %xmm2
    230 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    231 ; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3]
    232 ; SSE41-NEXT:    retq
    233 ;
    234 ; AVX-LABEL: insert_v8f32_z12345z7:
    235 ; AVX:       # BB#0:
    236 ; AVX-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    237 ; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
    238 ; AVX-NEXT:    retq
    239   %1 = insertelement <8 x float> %a, float 0.0, i32 0
    240   %2 = insertelement <8 x float> %1, float 0.0, i32 6
    241   ret <8 x float> %2
    242 }
    243 
    244 define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) {
    245 ; SSE2-LABEL: insert_v4i32_01z3:
    246 ; SSE2:       # BB#0:
    247 ; SSE2-NEXT:    xorl %eax, %eax
    248 ; SSE2-NEXT:    movd %eax, %xmm1
    249 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    250 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    251 ; SSE2-NEXT:    retq
    252 ;
    253 ; SSE3-LABEL: insert_v4i32_01z3:
    254 ; SSE3:       # BB#0:
    255 ; SSE3-NEXT:    xorl %eax, %eax
    256 ; SSE3-NEXT:    movd %eax, %xmm1
    257 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    258 ; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    259 ; SSE3-NEXT:    retq
    260 ;
    261 ; SSSE3-LABEL: insert_v4i32_01z3:
    262 ; SSSE3:       # BB#0:
    263 ; SSSE3-NEXT:    xorl %eax, %eax
    264 ; SSSE3-NEXT:    movd %eax, %xmm1
    265 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
    266 ; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
    267 ; SSSE3-NEXT:    retq
    268 ;
    269 ; SSE41-LABEL: insert_v4i32_01z3:
    270 ; SSE41:       # BB#0:
    271 ; SSE41-NEXT:    pxor %xmm1, %xmm1
    272 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
    273 ; SSE41-NEXT:    retq
    274 ;
    275 ; AVX1-LABEL: insert_v4i32_01z3:
    276 ; AVX1:       # BB#0:
    277 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    278 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
    279 ; AVX1-NEXT:    retq
    280 ;
    281 ; AVX2-LABEL: insert_v4i32_01z3:
    282 ; AVX2:       # BB#0:
    283 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    284 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
    285 ; AVX2-NEXT:    retq
    286   %1 = insertelement <4 x i32> %a, i32 0, i32 2
    287   ret <4 x i32> %1
    288 }
    289 
    290 define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) {
    291 ; SSE2-LABEL: insert_v8i32_z12345z7:
    292 ; SSE2:       # BB#0:
    293 ; SSE2-NEXT:    xorps %xmm2, %xmm2
    294 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    295 ; SSE2-NEXT:    xorl %eax, %eax
    296 ; SSE2-NEXT:    movd %eax, %xmm2
    297 ; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    298 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    299 ; SSE2-NEXT:    retq
    300 ;
    301 ; SSE3-LABEL: insert_v8i32_z12345z7:
    302 ; SSE3:       # BB#0:
    303 ; SSE3-NEXT:    xorps %xmm2, %xmm2
    304 ; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    305 ; SSE3-NEXT:    xorl %eax, %eax
    306 ; SSE3-NEXT:    movd %eax, %xmm2
    307 ; SSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    308 ; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    309 ; SSE3-NEXT:    retq
    310 ;
    311 ; SSSE3-LABEL: insert_v8i32_z12345z7:
    312 ; SSSE3:       # BB#0:
    313 ; SSSE3-NEXT:    xorps %xmm2, %xmm2
    314 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
    315 ; SSSE3-NEXT:    xorl %eax, %eax
    316 ; SSSE3-NEXT:    movd %eax, %xmm2
    317 ; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
    318 ; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
    319 ; SSSE3-NEXT:    retq
    320 ;
    321 ; SSE41-LABEL: insert_v8i32_z12345z7:
    322 ; SSE41:       # BB#0:
    323 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    324 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
    325 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
    326 ; SSE41-NEXT:    retq
    327 ;
    328 ; AVX1-LABEL: insert_v8i32_z12345z7:
    329 ; AVX1:       # BB#0:
    330 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    331 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
    332 ; AVX1-NEXT:    retq
    333 ;
    334 ; AVX2-LABEL: insert_v8i32_z12345z7:
    335 ; AVX2:       # BB#0:
    336 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    337 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
    338 ; AVX2-NEXT:    retq
    339   %1 = insertelement <8 x i32> %a, i32 0, i32 0
    340   %2 = insertelement <8 x i32> %1, i32 0, i32 6
    341   ret <8 x i32> %2
    342 }
    343 
    344 define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) {
    345 ; SSE2-LABEL: insert_v8i16_z12345z7:
    346 ; SSE2:       # BB#0:
    347 ; SSE2-NEXT:    xorl %eax, %eax
    348 ; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
    349 ; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
    350 ; SSE2-NEXT:    retq
    351 ;
    352 ; SSE3-LABEL: insert_v8i16_z12345z7:
    353 ; SSE3:       # BB#0:
    354 ; SSE3-NEXT:    xorl %eax, %eax
    355 ; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
    356 ; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
    357 ; SSE3-NEXT:    retq
    358 ;
    359 ; SSSE3-LABEL: insert_v8i16_z12345z7:
    360 ; SSSE3:       # BB#0:
    361 ; SSSE3-NEXT:    xorl %eax, %eax
    362 ; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
    363 ; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
    364 ; SSSE3-NEXT:    retq
    365 ;
    366 ; SSE41-LABEL: insert_v8i16_z12345z7:
    367 ; SSE41:       # BB#0:
    368 ; SSE41-NEXT:    pxor %xmm1, %xmm1
    369 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
    370 ; SSE41-NEXT:    retq
    371 ;
    372 ; AVX-LABEL: insert_v8i16_z12345z7:
    373 ; AVX:       # BB#0:
    374 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    375 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
    376 ; AVX-NEXT:    retq
    377   %1 = insertelement <8 x i16> %a, i16 0, i32 0
    378   %2 = insertelement <8 x i16> %1, i16 0, i32 6
    379   ret <8 x i16> %2
    380 }
    381 
    382 define <16 x i16> @insert_v16i16_z12345z789ABZDEz(<16 x i16> %a) {
    383 ; SSE2-LABEL: insert_v16i16_z12345z789ABZDEz:
    384 ; SSE2:       # BB#0:
    385 ; SSE2-NEXT:    xorl %eax, %eax
    386 ; SSE2-NEXT:    pinsrw $0, %eax, %xmm0
    387 ; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
    388 ; SSE2-NEXT:    pinsrw $7, %eax, %xmm1
    389 ; SSE2-NEXT:    retq
    390 ;
    391 ; SSE3-LABEL: insert_v16i16_z12345z789ABZDEz:
    392 ; SSE3:       # BB#0:
    393 ; SSE3-NEXT:    xorl %eax, %eax
    394 ; SSE3-NEXT:    pinsrw $0, %eax, %xmm0
    395 ; SSE3-NEXT:    pinsrw $6, %eax, %xmm0
    396 ; SSE3-NEXT:    pinsrw $7, %eax, %xmm1
    397 ; SSE3-NEXT:    retq
    398 ;
    399 ; SSSE3-LABEL: insert_v16i16_z12345z789ABZDEz:
    400 ; SSSE3:       # BB#0:
    401 ; SSSE3-NEXT:    xorl %eax, %eax
    402 ; SSSE3-NEXT:    pinsrw $0, %eax, %xmm0
    403 ; SSSE3-NEXT:    pinsrw $6, %eax, %xmm0
    404 ; SSSE3-NEXT:    pinsrw $7, %eax, %xmm1
    405 ; SSSE3-NEXT:    retq
    406 ;
    407 ; SSE41-LABEL: insert_v16i16_z12345z789ABZDEz:
    408 ; SSE41:       # BB#0:
    409 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    410 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
    411 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
    412 ; SSE41-NEXT:    retq
    413 ;
    414 ; AVX1-LABEL: insert_v16i16_z12345z789ABZDEz:
    415 ; AVX1:       # BB#0:
    416 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    417 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7]
    418 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    419 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7]
    420 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    421 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    422 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
    423 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    424 ; AVX1-NEXT:    retq
    425 ;
    426 ; AVX2-LABEL: insert_v16i16_z12345z789ABZDEz:
    427 ; AVX2:       # BB#0:
    428 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    429 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3,4,5,6,7]
    430 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    431 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7]
    432 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
    433 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
    434 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
    435 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    436 ; AVX2-NEXT:    retq
    437   %1 = insertelement <16 x i16> %a, i16 0, i32 0
    438   %2 = insertelement <16 x i16> %1, i16 0, i32 6
    439   %3 = insertelement <16 x i16> %2, i16 0, i32 15
    440   ret <16 x i16> %3
    441 }
    442 
    443 define <16 x i8> @insert_v16i8_z123456789ABZDEz(<16 x i8> %a) {
    444 ; SSE2-LABEL: insert_v16i8_z123456789ABZDEz:
    445 ; SSE2:       # BB#0:
    446 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    447 ; SSE2-NEXT:    pand %xmm1, %xmm0
    448 ; SSE2-NEXT:    xorl %eax, %eax
    449 ; SSE2-NEXT:    movd %eax, %xmm2
    450 ; SSE2-NEXT:    pandn %xmm2, %xmm1
    451 ; SSE2-NEXT:    por %xmm1, %xmm0
    452 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    453 ; SSE2-NEXT:    pand %xmm1, %xmm0
    454 ; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
    455 ; SSE2-NEXT:    pandn %xmm2, %xmm1
    456 ; SSE2-NEXT:    por %xmm1, %xmm0
    457 ; SSE2-NEXT:    retq
    458 ;
    459 ; SSE3-LABEL: insert_v16i8_z123456789ABZDEz:
    460 ; SSE3:       # BB#0:
    461 ; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    462 ; SSE3-NEXT:    pand %xmm1, %xmm0
    463 ; SSE3-NEXT:    xorl %eax, %eax
    464 ; SSE3-NEXT:    movd %eax, %xmm2
    465 ; SSE3-NEXT:    pandn %xmm2, %xmm1
    466 ; SSE3-NEXT:    por %xmm1, %xmm0
    467 ; SSE3-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    468 ; SSE3-NEXT:    pand %xmm1, %xmm0
    469 ; SSE3-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
    470 ; SSE3-NEXT:    pandn %xmm2, %xmm1
    471 ; SSE3-NEXT:    por %xmm1, %xmm0
    472 ; SSE3-NEXT:    retq
    473 ;
    474 ; SSSE3-LABEL: insert_v16i8_z123456789ABZDEz:
    475 ; SSSE3:       # BB#0:
    476 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    477 ; SSSE3-NEXT:    xorl %eax, %eax
    478 ; SSSE3-NEXT:    movd %eax, %xmm1
    479 ; SSSE3-NEXT:    movdqa %xmm1, %xmm2
    480 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    481 ; SSSE3-NEXT:    por %xmm2, %xmm0
    482 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
    483 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
    484 ; SSSE3-NEXT:    por %xmm1, %xmm0
    485 ; SSSE3-NEXT:    retq
    486 ;
    487 ; SSE41-LABEL: insert_v16i8_z123456789ABZDEz:
    488 ; SSE41:       # BB#0:
    489 ; SSE41-NEXT:    xorl %eax, %eax
    490 ; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
    491 ; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
    492 ; SSE41-NEXT:    retq
    493 ;
    494 ; AVX-LABEL: insert_v16i8_z123456789ABZDEz:
    495 ; AVX:       # BB#0:
    496 ; AVX-NEXT:    xorl %eax, %eax
    497 ; AVX-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
    498 ; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    499 ; AVX-NEXT:    retq
    500   %1 = insertelement <16 x i8> %a, i8 0, i32 0
    501   %2 = insertelement <16 x i8> %1, i8 0, i32 15
    502   ret <16 x i8> %2
    503 }
    504 
    505 define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
    506 ; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
    507 ; SSE2:       # BB#0:
    508 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    509 ; SSE2-NEXT:    pand %xmm2, %xmm0
    510 ; SSE2-NEXT:    xorl %eax, %eax
    511 ; SSE2-NEXT:    movd %eax, %xmm3
    512 ; SSE2-NEXT:    pandn %xmm3, %xmm2
    513 ; SSE2-NEXT:    por %xmm2, %xmm0
    514 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    515 ; SSE2-NEXT:    pand %xmm2, %xmm0
    516 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
    517 ; SSE2-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
    518 ; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
    519 ; SSE2-NEXT:    pand %xmm5, %xmm1
    520 ; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
    521 ; SSE2-NEXT:    pandn %xmm3, %xmm5
    522 ; SSE2-NEXT:    por %xmm5, %xmm1
    523 ; SSE2-NEXT:    pand %xmm2, %xmm1
    524 ; SSE2-NEXT:    pandn %xmm4, %xmm2
    525 ; SSE2-NEXT:    por %xmm2, %xmm0
    526 ; SSE2-NEXT:    por %xmm2, %xmm1
    527 ; SSE2-NEXT:    retq
    528 ;
    529 ; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
    530 ; SSE3:       # BB#0:
    531 ; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    532 ; SSE3-NEXT:    pand %xmm2, %xmm0
    533 ; SSE3-NEXT:    xorl %eax, %eax
    534 ; SSE3-NEXT:    movd %eax, %xmm3
    535 ; SSE3-NEXT:    pandn %xmm3, %xmm2
    536 ; SSE3-NEXT:    por %xmm2, %xmm0
    537 ; SSE3-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    538 ; SSE3-NEXT:    pand %xmm2, %xmm0
    539 ; SSE3-NEXT:    movdqa %xmm3, %xmm4
    540 ; SSE3-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
    541 ; SSE3-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
    542 ; SSE3-NEXT:    pand %xmm5, %xmm1
    543 ; SSE3-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
    544 ; SSE3-NEXT:    pandn %xmm3, %xmm5
    545 ; SSE3-NEXT:    por %xmm5, %xmm1
    546 ; SSE3-NEXT:    pand %xmm2, %xmm1
    547 ; SSE3-NEXT:    pandn %xmm4, %xmm2
    548 ; SSE3-NEXT:    por %xmm2, %xmm0
    549 ; SSE3-NEXT:    por %xmm2, %xmm1
    550 ; SSE3-NEXT:    retq
    551 ;
    552 ; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
    553 ; SSSE3:       # BB#0:
    554 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    555 ; SSSE3-NEXT:    xorl %eax, %eax
    556 ; SSSE3-NEXT:    movd %eax, %xmm2
    557 ; SSSE3-NEXT:    movdqa %xmm2, %xmm3
    558 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    559 ; SSSE3-NEXT:    por %xmm3, %xmm0
    560 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128]
    561 ; SSSE3-NEXT:    pshufb %xmm3, %xmm0
    562 ; SSSE3-NEXT:    movdqa %xmm2, %xmm4
    563 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
    564 ; SSSE3-NEXT:    por %xmm4, %xmm0
    565 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
    566 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero
    567 ; SSSE3-NEXT:    por %xmm2, %xmm1
    568 ; SSSE3-NEXT:    pshufb %xmm3, %xmm1
    569 ; SSSE3-NEXT:    por %xmm4, %xmm1
    570 ; SSSE3-NEXT:    retq
    571 ;
    572 ; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
    573 ; SSE41:       # BB#0:
    574 ; SSE41-NEXT:    xorl %eax, %eax
    575 ; SSE41-NEXT:    pinsrb $0, %eax, %xmm0
    576 ; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
    577 ; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
    578 ; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
    579 ; SSE41-NEXT:    retq
    580 ;
    581 ; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
    582 ; AVX1:       # BB#0:
    583 ; AVX1-NEXT:    xorl %eax, %eax
    584 ; AVX1-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
    585 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    586 ; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm1
    587 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    588 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    589 ; AVX1-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
    590 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    591 ; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    592 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    593 ; AVX1-NEXT:    retq
    594 ;
    595 ; AVX2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
    596 ; AVX2:       # BB#0:
    597 ; AVX2-NEXT:    xorl %eax, %eax
    598 ; AVX2-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
    599 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    600 ; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm1
    601 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    602 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    603 ; AVX2-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
    604 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    605 ; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    606 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    607 ; AVX2-NEXT:    retq
    608   %1 = insertelement <32 x i8> %a, i8 0, i32 0
    609   %2 = insertelement <32 x i8> %1, i8 0, i32 15
    610   %3 = insertelement <32 x i8> %2, i8 0, i32 30
    611   %4 = insertelement <32 x i8> %3, i8 0, i32 31
    612   ret <32 x i8> %4
    613 }
    614