Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      5 
      6 ;
      7 ; PR6455 'Clear Upper Bits' Patterns
      8 ;
      9 
     10 define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind {
     11 ; SSE-LABEL: _clearupper2xi64a:
     12 ; SSE:       # BB#0:
     13 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     14 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     15 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
     16 ; SSE-NEXT:    retq
     17 ;
     18 ; AVX-LABEL: _clearupper2xi64a:
     19 ; AVX:       # BB#0:
     20 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     21 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     22 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
     23 ; AVX-NEXT:    retq
     24   %x0 = extractelement <2 x i64> %0, i32 0
     25   %x1 = extractelement <2 x i64> %0, i32 1
     26   %trunc0 = trunc i64 %x0 to i32
     27   %trunc1 = trunc i64 %x1 to i32
     28   %ext0 = zext i32 %trunc0 to i64
     29   %ext1 = zext i32 %trunc1 to i64
     30   %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
     31   %v1 = insertelement <2 x i64> %v0,   i64 %ext1, i32 1
     32   ret <2 x i64> %v1
     33 }
     34 
     35 define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
     36 ; SSE-LABEL: _clearupper4xi32a:
     37 ; SSE:       # BB#0:
     38 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
     39 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
     40 ; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[3,1,2,3]
     41 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
     42 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
     43 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
     44 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
     45 ; SSE-NEXT:    retq
     46 ;
     47 ; AVX1-LABEL: _clearupper4xi32a:
     48 ; AVX1:       # BB#0:
     49 ; AVX1-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
     50 ; AVX1-NEXT:    retq
     51 ;
     52 ; AVX2-LABEL: _clearupper4xi32a:
     53 ; AVX2:       # BB#0:
     54 ; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     55 ; AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
     56 ; AVX2-NEXT:    retq
     57   %x0 = extractelement <4 x i32> %0, i32 0
     58   %x1 = extractelement <4 x i32> %0, i32 1
     59   %x2 = extractelement <4 x i32> %0, i32 2
     60   %x3 = extractelement <4 x i32> %0, i32 3
     61   %trunc0 = trunc i32 %x0 to i16
     62   %trunc1 = trunc i32 %x1 to i16
     63   %trunc2 = trunc i32 %x2 to i16
     64   %trunc3 = trunc i32 %x3 to i16
     65   %ext0 = zext i16 %trunc0 to i32
     66   %ext1 = zext i16 %trunc1 to i32
     67   %ext2 = zext i16 %trunc2 to i32
     68   %ext3 = zext i16 %trunc3 to i32
     69   %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
     70   %v1 = insertelement <4 x i32> %v0,   i32 %ext1, i32 1
     71   %v2 = insertelement <4 x i32> %v1,   i32 %ext2, i32 2
     72   %v3 = insertelement <4 x i32> %v2,   i32 %ext3, i32 3
     73   ret <4 x i32> %v3
     74 }
     75 
     76 define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
     77 ; SSE-LABEL: _clearupper8xi16a:
     78 ; SSE:       # BB#0:
     79 ; SSE-NEXT:    pextrw $1, %xmm0, %eax
     80 ; SSE-NEXT:    pextrw $2, %xmm0, %r9d
     81 ; SSE-NEXT:    pextrw $3, %xmm0, %edx
     82 ; SSE-NEXT:    pextrw $4, %xmm0, %r8d
     83 ; SSE-NEXT:    pextrw $5, %xmm0, %edi
     84 ; SSE-NEXT:    pextrw $6, %xmm0, %esi
     85 ; SSE-NEXT:    pextrw $7, %xmm0, %ecx
     86 ; SSE-NEXT:    movd %ecx, %xmm1
     87 ; SSE-NEXT:    movd %edx, %xmm2
     88 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
     89 ; SSE-NEXT:    movd %edi, %xmm1
     90 ; SSE-NEXT:    movd %eax, %xmm3
     91 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
     92 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
     93 ; SSE-NEXT:    movd %esi, %xmm1
     94 ; SSE-NEXT:    movd %r9d, %xmm2
     95 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
     96 ; SSE-NEXT:    movd %r8d, %xmm1
     97 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     98 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     99 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
    100 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
    101 ; SSE-NEXT:    retq
    102 ;
    103 ; AVX-LABEL: _clearupper8xi16a:
    104 ; AVX:       # BB#0:
    105 ; AVX-NEXT:    vpextrw $1, %xmm0, %eax
    106 ; AVX-NEXT:    vpextrw $2, %xmm0, %ecx
    107 ; AVX-NEXT:    vpextrw $3, %xmm0, %edx
    108 ; AVX-NEXT:    vpextrw $4, %xmm0, %esi
    109 ; AVX-NEXT:    vpextrw $5, %xmm0, %edi
    110 ; AVX-NEXT:    vpextrw $6, %xmm0, %r8d
    111 ; AVX-NEXT:    vpextrw $7, %xmm0, %r9d
    112 ; AVX-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
    113 ; AVX-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
    114 ; AVX-NEXT:    vpinsrw $3, %edx, %xmm0, %xmm0
    115 ; AVX-NEXT:    vpinsrw $4, %esi, %xmm0, %xmm0
    116 ; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
    117 ; AVX-NEXT:    vpinsrw $6, %r8d, %xmm0, %xmm0
    118 ; AVX-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
    119 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    120 ; AVX-NEXT:    retq
    121   %x0 = extractelement <8 x i16> %0, i32 0
    122   %x1 = extractelement <8 x i16> %0, i32 1
    123   %x2 = extractelement <8 x i16> %0, i32 2
    124   %x3 = extractelement <8 x i16> %0, i32 3
    125   %x4 = extractelement <8 x i16> %0, i32 4
    126   %x5 = extractelement <8 x i16> %0, i32 5
    127   %x6 = extractelement <8 x i16> %0, i32 6
    128   %x7 = extractelement <8 x i16> %0, i32 7
    129   %trunc0 = trunc i16 %x0 to i8
    130   %trunc1 = trunc i16 %x1 to i8
    131   %trunc2 = trunc i16 %x2 to i8
    132   %trunc3 = trunc i16 %x3 to i8
    133   %trunc4 = trunc i16 %x4 to i8
    134   %trunc5 = trunc i16 %x5 to i8
    135   %trunc6 = trunc i16 %x6 to i8
    136   %trunc7 = trunc i16 %x7 to i8
    137   %ext0 = zext i8 %trunc0 to i16
    138   %ext1 = zext i8 %trunc1 to i16
    139   %ext2 = zext i8 %trunc2 to i16
    140   %ext3 = zext i8 %trunc3 to i16
    141   %ext4 = zext i8 %trunc4 to i16
    142   %ext5 = zext i8 %trunc5 to i16
    143   %ext6 = zext i8 %trunc6 to i16
    144   %ext7 = zext i8 %trunc7 to i16
    145   %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0
    146   %v1 = insertelement <8 x i16> %v0,   i16 %ext1, i32 1
    147   %v2 = insertelement <8 x i16> %v1,   i16 %ext2, i32 2
    148   %v3 = insertelement <8 x i16> %v2,   i16 %ext3, i32 3
    149   %v4 = insertelement <8 x i16> %v3,   i16 %ext4, i32 4
    150   %v5 = insertelement <8 x i16> %v4,   i16 %ext5, i32 5
    151   %v6 = insertelement <8 x i16> %v5,   i16 %ext6, i32 6
    152   %v7 = insertelement <8 x i16> %v6,   i16 %ext7, i32 7
    153   ret <8 x i16> %v7
    154 }
    155 
    156 define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
    157 ; SSE-LABEL: _clearupper16xi8a:
    158 ; SSE:       # BB#0:
    159 ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
    160 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
    161 ; SSE-NEXT:    movd %eax, %xmm0
    162 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
    163 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
    164 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
    165 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
    166 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
    167 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
    168 ; SSE-NEXT:    movd %eax, %xmm1
    169 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    170 ; SSE-NEXT:    movd %esi, %xmm0
    171 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
    172 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
    173 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
    174 ; SSE-NEXT:    movd %ecx, %xmm2
    175 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
    176 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
    177 ; SSE-NEXT:    movd %edx, %xmm0
    178 ; SSE-NEXT:    movd %esi, %xmm1
    179 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    180 ; SSE-NEXT:    movd %edi, %xmm0
    181 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
    182 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
    183 ; SSE-NEXT:    movd %edx, %xmm3
    184 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
    185 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
    186 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
    187 ; SSE-NEXT:    movd %r9d, %xmm0
    188 ; SSE-NEXT:    movd %eax, %xmm1
    189 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    190 ; SSE-NEXT:    movd %r8d, %xmm0
    191 ; SSE-NEXT:    movd %ecx, %xmm2
    192 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
    193 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
    194 ; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    195 ; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    196 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    197 ; SSE-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
    198 ; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    199 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
    200 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    201 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    202 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
    203 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
    204 ; SSE-NEXT:    retq
    205 ;
    206 ; AVX-LABEL: _clearupper16xi8a:
    207 ; AVX:       # BB#0:
    208 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
    209 ; AVX-NEXT:    vmovd %eax, %xmm1
    210 ; AVX-NEXT:    vpextrb $1, %xmm0, %eax
    211 ; AVX-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    212 ; AVX-NEXT:    vpextrb $2, %xmm0, %eax
    213 ; AVX-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    214 ; AVX-NEXT:    vpextrb $3, %xmm0, %eax
    215 ; AVX-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
    216 ; AVX-NEXT:    vpextrb $4, %xmm0, %eax
    217 ; AVX-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
    218 ; AVX-NEXT:    vpextrb $5, %xmm0, %eax
    219 ; AVX-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
    220 ; AVX-NEXT:    vpextrb $6, %xmm0, %eax
    221 ; AVX-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
    222 ; AVX-NEXT:    vpextrb $7, %xmm0, %eax
    223 ; AVX-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
    224 ; AVX-NEXT:    vpextrb $8, %xmm0, %eax
    225 ; AVX-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
    226 ; AVX-NEXT:    vpextrb $9, %xmm0, %eax
    227 ; AVX-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
    228 ; AVX-NEXT:    vpextrb $10, %xmm0, %eax
    229 ; AVX-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
    230 ; AVX-NEXT:    vpextrb $11, %xmm0, %eax
    231 ; AVX-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
    232 ; AVX-NEXT:    vpextrb $12, %xmm0, %eax
    233 ; AVX-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
    234 ; AVX-NEXT:    vpextrb $13, %xmm0, %eax
    235 ; AVX-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
    236 ; AVX-NEXT:    vpextrb $14, %xmm0, %eax
    237 ; AVX-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
    238 ; AVX-NEXT:    vpextrb $15, %xmm0, %eax
    239 ; AVX-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm0
    240 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    241 ; AVX-NEXT:    retq
    242   %x0  = extractelement <16 x i8> %0, i32 0
    243   %x1  = extractelement <16 x i8> %0, i32 1
    244   %x2  = extractelement <16 x i8> %0, i32 2
    245   %x3  = extractelement <16 x i8> %0, i32 3
    246   %x4  = extractelement <16 x i8> %0, i32 4
    247   %x5  = extractelement <16 x i8> %0, i32 5
    248   %x6  = extractelement <16 x i8> %0, i32 6
    249   %x7  = extractelement <16 x i8> %0, i32 7
    250   %x8  = extractelement <16 x i8> %0, i32 8
    251   %x9  = extractelement <16 x i8> %0, i32 9
    252   %x10 = extractelement <16 x i8> %0, i32 10
    253   %x11 = extractelement <16 x i8> %0, i32 11
    254   %x12 = extractelement <16 x i8> %0, i32 12
    255   %x13 = extractelement <16 x i8> %0, i32 13
    256   %x14 = extractelement <16 x i8> %0, i32 14
    257   %x15 = extractelement <16 x i8> %0, i32 15
    258   %trunc0  = trunc i8 %x0  to i4
    259   %trunc1  = trunc i8 %x1  to i4
    260   %trunc2  = trunc i8 %x2  to i4
    261   %trunc3  = trunc i8 %x3  to i4
    262   %trunc4  = trunc i8 %x4  to i4
    263   %trunc5  = trunc i8 %x5  to i4
    264   %trunc6  = trunc i8 %x6  to i4
    265   %trunc7  = trunc i8 %x7  to i4
    266   %trunc8  = trunc i8 %x8  to i4
    267   %trunc9  = trunc i8 %x9  to i4
    268   %trunc10 = trunc i8 %x10 to i4
    269   %trunc11 = trunc i8 %x11 to i4
    270   %trunc12 = trunc i8 %x12 to i4
    271   %trunc13 = trunc i8 %x13 to i4
    272   %trunc14 = trunc i8 %x14 to i4
    273   %trunc15 = trunc i8 %x15 to i4
    274   %ext0  = zext i4 %trunc0  to i8
    275   %ext1  = zext i4 %trunc1  to i8
    276   %ext2  = zext i4 %trunc2  to i8
    277   %ext3  = zext i4 %trunc3  to i8
    278   %ext4  = zext i4 %trunc4  to i8
    279   %ext5  = zext i4 %trunc5  to i8
    280   %ext6  = zext i4 %trunc6  to i8
    281   %ext7  = zext i4 %trunc7  to i8
    282   %ext8  = zext i4 %trunc8  to i8
    283   %ext9  = zext i4 %trunc9  to i8
    284   %ext10 = zext i4 %trunc10 to i8
    285   %ext11 = zext i4 %trunc11 to i8
    286   %ext12 = zext i4 %trunc12 to i8
    287   %ext13 = zext i4 %trunc13 to i8
    288   %ext14 = zext i4 %trunc14 to i8
    289   %ext15 = zext i4 %trunc15 to i8
    290   %v0  = insertelement <16 x i8> undef, i8 %ext0,  i32 0
    291   %v1  = insertelement <16 x i8> %v0,   i8 %ext1,  i32 1
    292   %v2  = insertelement <16 x i8> %v1,   i8 %ext2,  i32 2
    293   %v3  = insertelement <16 x i8> %v2,   i8 %ext3,  i32 3
    294   %v4  = insertelement <16 x i8> %v3,   i8 %ext4,  i32 4
    295   %v5  = insertelement <16 x i8> %v4,   i8 %ext5,  i32 5
    296   %v6  = insertelement <16 x i8> %v5,   i8 %ext6,  i32 6
    297   %v7  = insertelement <16 x i8> %v6,   i8 %ext7,  i32 7
    298   %v8  = insertelement <16 x i8> %v7,   i8 %ext8,  i32 8
    299   %v9  = insertelement <16 x i8> %v8,   i8 %ext9,  i32 9
    300   %v10 = insertelement <16 x i8> %v9,   i8 %ext10, i32 10
    301   %v11 = insertelement <16 x i8> %v10,  i8 %ext11, i32 11
    302   %v12 = insertelement <16 x i8> %v11,  i8 %ext12, i32 12
    303   %v13 = insertelement <16 x i8> %v12,  i8 %ext13, i32 13
    304   %v14 = insertelement <16 x i8> %v13,  i8 %ext14, i32 14
    305   %v15 = insertelement <16 x i8> %v14,  i8 %ext15, i32 15
    306   ret <16 x i8> %v15
    307 }
    308 
    309 define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
    310 ; SSE-LABEL: _clearupper2xi64b:
    311 ; SSE:       # BB#0:
    312 ; SSE-NEXT:    xorl %eax, %eax
    313 ; SSE-NEXT:    movd %eax, %xmm2
    314 ; SSE-NEXT:    movaps %xmm2, %xmm1
    315 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
    316 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
    317 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0]
    318 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,0]
    319 ; SSE-NEXT:    movaps %xmm1, %xmm0
    320 ; SSE-NEXT:    retq
    321 ;
    322 ; AVX1-LABEL: _clearupper2xi64b:
    323 ; AVX1:       # BB#0:
    324 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    325 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
    326 ; AVX1-NEXT:    retq
    327 ;
    328 ; AVX2-LABEL: _clearupper2xi64b:
    329 ; AVX2:       # BB#0:
    330 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    331 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
    332 ; AVX2-NEXT:    retq
    333   %x32 = bitcast <2 x i64> %0 to <4 x i32>
    334   %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
    335   %r1 = insertelement <4 x i32> %r0,  i32 zeroinitializer, i32 3
    336   %r = bitcast <4 x i32> %r1 to <2 x i64>
    337   ret <2 x i64> %r
    338 }
    339 
    340 define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
    341 ; SSE-LABEL: _clearupper4xi32b:
    342 ; SSE:       # BB#0:
    343 ; SSE-NEXT:    xorl %eax, %eax
    344 ; SSE-NEXT:    pinsrw $1, %eax, %xmm0
    345 ; SSE-NEXT:    pinsrw $3, %eax, %xmm0
    346 ; SSE-NEXT:    pinsrw $5, %eax, %xmm0
    347 ; SSE-NEXT:    pinsrw $7, %eax, %xmm0
    348 ; SSE-NEXT:    retq
    349 ;
    350 ; AVX-LABEL: _clearupper4xi32b:
    351 ; AVX:       # BB#0:
    352 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    353 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    354 ; AVX-NEXT:    retq
    355   %x16 = bitcast <4 x i32> %0 to <8 x i16>
    356   %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
    357   %r1 = insertelement <8 x i16> %r0,  i16 zeroinitializer, i32 3
    358   %r2 = insertelement <8 x i16> %r1,  i16 zeroinitializer, i32 5
    359   %r3 = insertelement <8 x i16> %r2,  i16 zeroinitializer, i32 7
    360   %r = bitcast <8 x i16> %r3 to <4 x i32>
    361   ret <4 x i32> %r
    362 }
    363 
    364 define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
    365 ; SSE-LABEL: _clearupper8xi16b:
    366 ; SSE:       # BB#0:
    367 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
    368 ; SSE-NEXT:    pand %xmm2, %xmm0
    369 ; SSE-NEXT:    xorl %eax, %eax
    370 ; SSE-NEXT:    movd %eax, %xmm1
    371 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    372 ; SSE-NEXT:    psllw $8, %xmm3
    373 ; SSE-NEXT:    pandn %xmm3, %xmm2
    374 ; SSE-NEXT:    por %xmm2, %xmm0
    375 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
    376 ; SSE-NEXT:    pand %xmm2, %xmm0
    377 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    378 ; SSE-NEXT:    pslld $24, %xmm3
    379 ; SSE-NEXT:    pandn %xmm3, %xmm2
    380 ; SSE-NEXT:    por %xmm2, %xmm0
    381 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
    382 ; SSE-NEXT:    pand %xmm2, %xmm0
    383 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    384 ; SSE-NEXT:    psllq $40, %xmm3
    385 ; SSE-NEXT:    pandn %xmm3, %xmm2
    386 ; SSE-NEXT:    por %xmm2, %xmm0
    387 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
    388 ; SSE-NEXT:    pand %xmm2, %xmm0
    389 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    390 ; SSE-NEXT:    psllq $56, %xmm3
    391 ; SSE-NEXT:    pandn %xmm3, %xmm2
    392 ; SSE-NEXT:    por %xmm2, %xmm0
    393 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
    394 ; SSE-NEXT:    pand %xmm2, %xmm0
    395 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    396 ; SSE-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
    397 ; SSE-NEXT:    pandn %xmm3, %xmm2
    398 ; SSE-NEXT:    por %xmm2, %xmm0
    399 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
    400 ; SSE-NEXT:    pand %xmm2, %xmm0
    401 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    402 ; SSE-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4]
    403 ; SSE-NEXT:    pandn %xmm3, %xmm2
    404 ; SSE-NEXT:    por %xmm2, %xmm0
    405 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
    406 ; SSE-NEXT:    pand %xmm2, %xmm0
    407 ; SSE-NEXT:    movdqa %xmm1, %xmm3
    408 ; SSE-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2]
    409 ; SSE-NEXT:    pandn %xmm3, %xmm2
    410 ; SSE-NEXT:    por %xmm2, %xmm0
    411 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
    412 ; SSE-NEXT:    pand %xmm2, %xmm0
    413 ; SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
    414 ; SSE-NEXT:    pandn %xmm1, %xmm2
    415 ; SSE-NEXT:    por %xmm2, %xmm0
    416 ; SSE-NEXT:    retq
    417 ;
    418 ; AVX-LABEL: _clearupper8xi16b:
    419 ; AVX:       # BB#0:
    420 ; AVX-NEXT:    xorl %eax, %eax
    421 ; AVX-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
    422 ; AVX-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
    423 ; AVX-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
    424 ; AVX-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
    425 ; AVX-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
    426 ; AVX-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
    427 ; AVX-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
    428 ; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    429 ; AVX-NEXT:    retq
    430   %x8 = bitcast <8 x i16> %0 to <16 x i8>
    431   %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1
    432   %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3
    433   %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5
    434   %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7
    435   %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9
    436   %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11
    437   %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13
    438   %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15
    439   %r = bitcast <16 x i8> %r7 to <8 x i16>
    440   ret <8 x i16> %r
    441 }
    442 
    443 define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
    444 ; SSE-LABEL: _clearupper16xi8b:
    445 ; SSE:       # BB#0:
    446 ; SSE-NEXT:    pushq %r14
    447 ; SSE-NEXT:    pushq %rbx
    448 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    449 ; SSE-NEXT:    movd %xmm0, %rcx
    450 ; SSE-NEXT:    movq %rcx, %r8
    451 ; SSE-NEXT:    movq %rcx, %r9
    452 ; SSE-NEXT:    movq %rcx, %r10
    453 ; SSE-NEXT:    movq %rcx, %rax
    454 ; SSE-NEXT:    movq %rcx, %rdx
    455 ; SSE-NEXT:    movq %rcx, %rsi
    456 ; SSE-NEXT:    movq %rcx, %rdi
    457 ; SSE-NEXT:    andb $15, %cl
    458 ; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
    459 ; SSE-NEXT:    movd %xmm1, %rcx
    460 ; SSE-NEXT:    shrq $56, %rdi
    461 ; SSE-NEXT:    andb $15, %dil
    462 ; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
    463 ; SSE-NEXT:    movq %rcx, %r11
    464 ; SSE-NEXT:    shrq $48, %rsi
    465 ; SSE-NEXT:    andb $15, %sil
    466 ; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
    467 ; SSE-NEXT:    movq %rcx, %r14
    468 ; SSE-NEXT:    shrq $40, %rdx
    469 ; SSE-NEXT:    andb $15, %dl
    470 ; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
    471 ; SSE-NEXT:    movq %rcx, %rdx
    472 ; SSE-NEXT:    shrq $32, %rax
    473 ; SSE-NEXT:    andb $15, %al
    474 ; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
    475 ; SSE-NEXT:    movq %rcx, %rax
    476 ; SSE-NEXT:    shrq $24, %r10
    477 ; SSE-NEXT:    andb $15, %r10b
    478 ; SSE-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
    479 ; SSE-NEXT:    movq %rcx, %rdi
    480 ; SSE-NEXT:    shrq $16, %r9
    481 ; SSE-NEXT:    andb $15, %r9b
    482 ; SSE-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
    483 ; SSE-NEXT:    movq %rcx, %rsi
    484 ; SSE-NEXT:    shrq $8, %r8
    485 ; SSE-NEXT:    andb $15, %r8b
    486 ; SSE-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
    487 ; SSE-NEXT:    movq %rcx, %rbx
    488 ; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
    489 ; SSE-NEXT:    andb $15, %cl
    490 ; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
    491 ; SSE-NEXT:    shrq $56, %rbx
    492 ; SSE-NEXT:    andb $15, %bl
    493 ; SSE-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
    494 ; SSE-NEXT:    shrq $48, %rsi
    495 ; SSE-NEXT:    andb $15, %sil
    496 ; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
    497 ; SSE-NEXT:    shrq $40, %rdi
    498 ; SSE-NEXT:    andb $15, %dil
    499 ; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
    500 ; SSE-NEXT:    shrq $32, %rax
    501 ; SSE-NEXT:    andb $15, %al
    502 ; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
    503 ; SSE-NEXT:    shrq $24, %rdx
    504 ; SSE-NEXT:    andb $15, %dl
    505 ; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
    506 ; SSE-NEXT:    shrq $16, %r14
    507 ; SSE-NEXT:    andb $15, %r14b
    508 ; SSE-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
    509 ; SSE-NEXT:    shrq $8, %r11
    510 ; SSE-NEXT:    andb $15, %r11b
    511 ; SSE-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
    512 ; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
    513 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    514 ; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    515 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    516 ; SSE-NEXT:    popq %rbx
    517 ; SSE-NEXT:    popq %r14
    518 ; SSE-NEXT:    retq
    519 ;
    520 ; AVX-LABEL: _clearupper16xi8b:
    521 ; AVX:       # BB#0:
    522 ; AVX-NEXT:    pushq %rbp
    523 ; AVX-NEXT:    pushq %r15
    524 ; AVX-NEXT:    pushq %r14
    525 ; AVX-NEXT:    pushq %r13
    526 ; AVX-NEXT:    pushq %r12
    527 ; AVX-NEXT:    pushq %rbx
    528 ; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
    529 ; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
    530 ; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
    531 ; AVX-NEXT:    movq %rcx, %r8
    532 ; AVX-NEXT:    movq %rcx, %r9
    533 ; AVX-NEXT:    movq %rcx, %r10
    534 ; AVX-NEXT:    movq %rcx, %r11
    535 ; AVX-NEXT:    movq %rcx, %r14
    536 ; AVX-NEXT:    movq %rcx, %r15
    537 ; AVX-NEXT:    movq %rdx, %r12
    538 ; AVX-NEXT:    movq %rdx, %r13
    539 ; AVX-NEXT:    movq %rdx, %rdi
    540 ; AVX-NEXT:    movq %rdx, %rax
    541 ; AVX-NEXT:    movq %rdx, %rsi
    542 ; AVX-NEXT:    movq %rdx, %rbx
    543 ; AVX-NEXT:    movq %rdx, %rbp
    544 ; AVX-NEXT:    andb $15, %dl
    545 ; AVX-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
    546 ; AVX-NEXT:    movq %rcx, %rdx
    547 ; AVX-NEXT:    andb $15, %cl
    548 ; AVX-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
    549 ; AVX-NEXT:    shrq $56, %rbp
    550 ; AVX-NEXT:    andb $15, %bpl
    551 ; AVX-NEXT:    movb %bpl, -{{[0-9]+}}(%rsp)
    552 ; AVX-NEXT:    shrq $48, %rbx
    553 ; AVX-NEXT:    andb $15, %bl
    554 ; AVX-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
    555 ; AVX-NEXT:    shrq $40, %rsi
    556 ; AVX-NEXT:    andb $15, %sil
    557 ; AVX-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
    558 ; AVX-NEXT:    shrq $32, %rax
    559 ; AVX-NEXT:    andb $15, %al
    560 ; AVX-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
    561 ; AVX-NEXT:    shrq $24, %rdi
    562 ; AVX-NEXT:    andb $15, %dil
    563 ; AVX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
    564 ; AVX-NEXT:    shrq $16, %r13
    565 ; AVX-NEXT:    andb $15, %r13b
    566 ; AVX-NEXT:    movb %r13b, -{{[0-9]+}}(%rsp)
    567 ; AVX-NEXT:    shrq $8, %r12
    568 ; AVX-NEXT:    andb $15, %r12b
    569 ; AVX-NEXT:    movb %r12b, -{{[0-9]+}}(%rsp)
    570 ; AVX-NEXT:    shrq $56, %rdx
    571 ; AVX-NEXT:    andb $15, %dl
    572 ; AVX-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
    573 ; AVX-NEXT:    shrq $48, %r15
    574 ; AVX-NEXT:    andb $15, %r15b
    575 ; AVX-NEXT:    movb %r15b, -{{[0-9]+}}(%rsp)
    576 ; AVX-NEXT:    shrq $40, %r14
    577 ; AVX-NEXT:    andb $15, %r14b
    578 ; AVX-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
    579 ; AVX-NEXT:    shrq $32, %r11
    580 ; AVX-NEXT:    andb $15, %r11b
    581 ; AVX-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
    582 ; AVX-NEXT:    shrq $24, %r10
    583 ; AVX-NEXT:    andb $15, %r10b
    584 ; AVX-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
    585 ; AVX-NEXT:    shrq $16, %r9
    586 ; AVX-NEXT:    andb $15, %r9b
    587 ; AVX-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
    588 ; AVX-NEXT:    shrq $8, %r8
    589 ; AVX-NEXT:    andb $15, %r8b
    590 ; AVX-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
    591 ; AVX-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
    592 ; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
    593 ; AVX-NEXT:    popq %rbx
    594 ; AVX-NEXT:    popq %r12
    595 ; AVX-NEXT:    popq %r13
    596 ; AVX-NEXT:    popq %r14
    597 ; AVX-NEXT:    popq %r15
    598 ; AVX-NEXT:    popq %rbp
    599 ; AVX-NEXT:    retq
    600   %x4  = bitcast <16 x i8> %0 to <32 x i4>
    601   %r0  = insertelement <32 x i4> %x4,  i4 zeroinitializer, i32 1
    602   %r1  = insertelement <32 x i4> %r0,  i4 zeroinitializer, i32 3
    603   %r2  = insertelement <32 x i4> %r1,  i4 zeroinitializer, i32 5
    604   %r3  = insertelement <32 x i4> %r2,  i4 zeroinitializer, i32 7
    605   %r4  = insertelement <32 x i4> %r3,  i4 zeroinitializer, i32 9
    606   %r5  = insertelement <32 x i4> %r4,  i4 zeroinitializer, i32 11
    607   %r6  = insertelement <32 x i4> %r5,  i4 zeroinitializer, i32 13
    608   %r7  = insertelement <32 x i4> %r6,  i4 zeroinitializer, i32 15
    609   %r8  = insertelement <32 x i4> %r7,  i4 zeroinitializer, i32 17
    610   %r9  = insertelement <32 x i4> %r8,  i4 zeroinitializer, i32 19
    611   %r10 = insertelement <32 x i4> %r9,  i4 zeroinitializer, i32 21
    612   %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23
    613   %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25
    614   %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27
    615   %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29
    616   %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31
    617   %r = bitcast <32 x i4> %r15 to <16 x i8>
    618   ret <16 x i8> %r
    619 }
    620 
    621 define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind {
    622 ; SSE-LABEL: _clearupper2xi64c:
    623 ; SSE:       # BB#0:
    624 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    625 ; SSE-NEXT:    retq
    626 ;
    627 ; AVX1-LABEL: _clearupper2xi64c:
    628 ; AVX1:       # BB#0:
    629 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    630 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
    631 ; AVX1-NEXT:    retq
    632 ;
    633 ; AVX2-LABEL: _clearupper2xi64c:
    634 ; AVX2:       # BB#0:
    635 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    636 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
    637 ; AVX2-NEXT:    retq
    638   %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
    639   ret <2 x i64> %r
    640 }
    641 
    642 define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind {
    643 ; SSE-LABEL: _clearupper4xi32c:
    644 ; SSE:       # BB#0:
    645 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    646 ; SSE-NEXT:    retq
    647 ;
    648 ; AVX-LABEL: _clearupper4xi32c:
    649 ; AVX:       # BB#0:
    650 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    651 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    652 ; AVX-NEXT:    retq
    653   %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0
    654   ret <4 x i32> %r
    655 }
    656 
    657 define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind {
    658 ; SSE-LABEL: _clearupper8xi16c:
    659 ; SSE:       # BB#0:
    660 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    661 ; SSE-NEXT:    retq
    662 ;
    663 ; AVX-LABEL: _clearupper8xi16c:
    664 ; AVX:       # BB#0:
    665 ; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    666 ; AVX-NEXT:    retq
    667   %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
    668   ret <8 x i16> %r
    669 }
    670 
    671 define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind {
    672 ; SSE-LABEL: _clearupper16xi8c:
    673 ; SSE:       # BB#0:
    674 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    675 ; SSE-NEXT:    retq
    676 ;
    677 ; AVX-LABEL: _clearupper16xi8c:
    678 ; AVX:       # BB#0:
    679 ; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    680 ; AVX-NEXT:    retq
    681   %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
    682   ret <16 x i8> %r
    683 }
    684