Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
      8 
      9 define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
     10 ; SSE2-LABEL: trunc8i64_8i32:
     11 ; SSE2:       # BB#0: # %entry
     12 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     13 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     14 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     15 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
     16 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
     17 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
     18 ; SSE2-NEXT:    retq
     19 ;
     20 ; SSSE3-LABEL: trunc8i64_8i32:
     21 ; SSSE3:       # BB#0: # %entry
     22 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     23 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     24 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     25 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
     26 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
     27 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
     28 ; SSSE3-NEXT:    retq
     29 ;
     30 ; SSE41-LABEL: trunc8i64_8i32:
     31 ; SSE41:       # BB#0: # %entry
     32 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
     33 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     34 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     35 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
     36 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
     37 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
     38 ; SSE41-NEXT:    retq
     39 ;
     40 ; AVX1-LABEL: trunc8i64_8i32:
     41 ; AVX1:       # BB#0: # %entry
     42 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
     43 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
     44 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     45 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
     46 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
     47 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
     48 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     49 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
     50 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     51 ; AVX1-NEXT:    retq
     52 ;
     53 ; AVX2-LABEL: trunc8i64_8i32:
     54 ; AVX2:       # BB#0: # %entry
     55 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
     56 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
     57 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
     58 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
     59 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
     60 ; AVX2-NEXT:    retq
     61 ;
     62 ; AVX512BW-LABEL: trunc8i64_8i32:
     63 ; AVX512BW:       # BB#0: # %entry
     64 ; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
     65 ; AVX512BW-NEXT:    retq
     66 entry:
     67   %0 = trunc <8 x i64> %a to <8 x i32>
     68   ret <8 x i32> %0
     69 }
     70 
     71 define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
     72 ; SSE2-LABEL: trunc8i64_8i16:
     73 ; SSE2:       # BB#0: # %entry
     74 ; SSE2-NEXT:    pextrw $4, %xmm1, %eax
     75 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
     76 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
     77 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     78 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     79 ; SSE2-NEXT:    pextrw $4, %xmm3, %edx
     80 ; SSE2-NEXT:    movd %edx, %xmm1
     81 ; SSE2-NEXT:    movd %eax, %xmm3
     82 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
     83 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
     84 ; SSE2-NEXT:    movd %eax, %xmm1
     85 ; SSE2-NEXT:    movd %ecx, %xmm2
     86 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
     87 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
     88 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     89 ; SSE2-NEXT:    retq
     90 ;
     91 ; SSSE3-LABEL: trunc8i64_8i16:
     92 ; SSSE3:       # BB#0: # %entry
     93 ; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
     94 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
     95 ; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
     96 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     97 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     98 ; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
     99 ; SSSE3-NEXT:    movd %edx, %xmm1
    100 ; SSSE3-NEXT:    movd %eax, %xmm3
    101 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
    102 ; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
    103 ; SSSE3-NEXT:    movd %eax, %xmm1
    104 ; SSSE3-NEXT:    movd %ecx, %xmm2
    105 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    106 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
    107 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    108 ; SSSE3-NEXT:    retq
    109 ;
    110 ; SSE41-LABEL: trunc8i64_8i16:
    111 ; SSE41:       # BB#0: # %entry
    112 ; SSE41-NEXT:    pxor %xmm4, %xmm4
    113 ; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
    114 ; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
    115 ; SSE41-NEXT:    packusdw %xmm3, %xmm2
    116 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
    117 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
    118 ; SSE41-NEXT:    packusdw %xmm1, %xmm0
    119 ; SSE41-NEXT:    packusdw %xmm2, %xmm0
    120 ; SSE41-NEXT:    retq
    121 ;
    122 ; AVX1-LABEL: trunc8i64_8i16:
    123 ; AVX1:       # BB#0: # %entry
    124 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    125 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    126 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
    127 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
    128 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
    129 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    130 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
    131 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
    132 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
    133 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
    134 ; AVX1-NEXT:    vzeroupper
    135 ; AVX1-NEXT:    retq
    136 ;
    137 ; AVX2-LABEL: trunc8i64_8i16:
    138 ; AVX2:       # BB#0: # %entry
    139 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
    140 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
    141 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
    142 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
    143 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    144 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    145 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    146 ; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
    147 ; AVX2-NEXT:    vzeroupper
    148 ; AVX2-NEXT:    retq
    149 ;
    150 ; AVX512BW-LABEL: trunc8i64_8i16:
    151 ; AVX512BW:       # BB#0: # %entry
    152 ; AVX512BW-NEXT:    vpmovqw %zmm0, %xmm0
    153 ; AVX512BW-NEXT:    retq
    154 entry:
    155   %0 = trunc <8 x i64> %a to <8 x i16>
    156   ret <8 x i16> %0
    157 }
    158 
    159 define void @trunc8i64_8i8(<8 x i64> %a) {
    160 ; SSE-LABEL: trunc8i64_8i8:
    161 ; SSE:       # BB#0: # %entry
    162 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
    163 ; SSE-NEXT:    pand %xmm4, %xmm3
    164 ; SSE-NEXT:    pand %xmm4, %xmm2
    165 ; SSE-NEXT:    packuswb %xmm3, %xmm2
    166 ; SSE-NEXT:    pand %xmm4, %xmm1
    167 ; SSE-NEXT:    pand %xmm4, %xmm0
    168 ; SSE-NEXT:    packuswb %xmm1, %xmm0
    169 ; SSE-NEXT:    packuswb %xmm2, %xmm0
    170 ; SSE-NEXT:    packuswb %xmm0, %xmm0
    171 ; SSE-NEXT:    movq %xmm0, (%rax)
    172 ; SSE-NEXT:    retq
    173 ;
    174 ; AVX1-LABEL: trunc8i64_8i8:
    175 ; AVX1:       # BB#0: # %entry
    176 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    177 ; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
    178 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    179 ; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
    180 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
    181 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    182 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    183 ; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
    184 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
    185 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    186 ; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
    187 ; AVX1-NEXT:    vmovq %xmm0, (%rax)
    188 ; AVX1-NEXT:    vzeroupper
    189 ; AVX1-NEXT:    retq
    190 ;
    191 ; AVX2-LABEL: trunc8i64_8i8:
    192 ; AVX2:       # BB#0: # %entry
    193 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
    194 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
    195 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
    196 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
    197 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    198 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    199 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    200 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    201 ; AVX2-NEXT:    vmovq %xmm0, (%rax)
    202 ; AVX2-NEXT:    vzeroupper
    203 ; AVX2-NEXT:    retq
    204 ;
    205 ; AVX512BW-LABEL: trunc8i64_8i8:
    206 ; AVX512BW:       # BB#0: # %entry
    207 ; AVX512BW-NEXT:    vpmovqb %zmm0, (%rax)
    208 ; AVX512BW-NEXT:    retq
    209 entry:
    210   %0 = trunc <8 x i64> %a to <8 x i8>
    211   store <8 x i8> %0, <8 x i8>* undef, align 4
    212   ret void
    213 }
    214 
    215 define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
    216 ; SSE2-LABEL: trunc8i32_8i16:
    217 ; SSE2:       # BB#0: # %entry
    218 ; SSE2-NEXT:    pslld $16, %xmm1
    219 ; SSE2-NEXT:    psrad $16, %xmm1
    220 ; SSE2-NEXT:    pslld $16, %xmm0
    221 ; SSE2-NEXT:    psrad $16, %xmm0
    222 ; SSE2-NEXT:    packssdw %xmm1, %xmm0
    223 ; SSE2-NEXT:    retq
    224 ;
    225 ; SSSE3-LABEL: trunc8i32_8i16:
    226 ; SSSE3:       # BB#0: # %entry
    227 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    228 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    229 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    230 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    231 ; SSSE3-NEXT:    retq
    232 ;
    233 ; SSE41-LABEL: trunc8i32_8i16:
    234 ; SSE41:       # BB#0: # %entry
    235 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    236 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    237 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    238 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    239 ; SSE41-NEXT:    retq
    240 ;
    241 ; AVX1-LABEL: trunc8i32_8i16:
    242 ; AVX1:       # BB#0: # %entry
    243 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    244 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    245 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    246 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    247 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    248 ; AVX1-NEXT:    vzeroupper
    249 ; AVX1-NEXT:    retq
    250 ;
    251 ; AVX2-LABEL: trunc8i32_8i16:
    252 ; AVX2:       # BB#0: # %entry
    253 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    254 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    255 ; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
    256 ; AVX2-NEXT:    vzeroupper
    257 ; AVX2-NEXT:    retq
    258 ;
    259 ; AVX512BW-LABEL: trunc8i32_8i16:
    260 ; AVX512BW:       # BB#0: # %entry
    261 ; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    262 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
    263 ; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
    264 ; AVX512BW-NEXT:    retq
    265 entry:
    266   %0 = trunc <8 x i32> %a to <8 x i16>
    267   ret <8 x i16> %0
    268 }
    269 
    270 define void @trunc8i32_8i8(<8 x i32> %a) {
    271 ; SSE2-LABEL: trunc8i32_8i8:
    272 ; SSE2:       # BB#0: # %entry
    273 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    274 ; SSE2-NEXT:    pand %xmm2, %xmm1
    275 ; SSE2-NEXT:    pand %xmm2, %xmm0
    276 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    277 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    278 ; SSE2-NEXT:    movq %xmm0, (%rax)
    279 ; SSE2-NEXT:    retq
    280 ;
    281 ; SSSE3-LABEL: trunc8i32_8i8:
    282 ; SSSE3:       # BB#0: # %entry
    283 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    284 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    285 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    286 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    287 ; SSSE3-NEXT:    movq %xmm0, (%rax)
    288 ; SSSE3-NEXT:    retq
    289 ;
    290 ; SSE41-LABEL: trunc8i32_8i8:
    291 ; SSE41:       # BB#0: # %entry
    292 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    293 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    294 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    295 ; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    296 ; SSE41-NEXT:    movq %xmm0, (%rax)
    297 ; SSE41-NEXT:    retq
    298 ;
    299 ; AVX1-LABEL: trunc8i32_8i8:
    300 ; AVX1:       # BB#0: # %entry
    301 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    302 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    303 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    304 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    305 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    306 ; AVX1-NEXT:    vmovq %xmm0, (%rax)
    307 ; AVX1-NEXT:    vzeroupper
    308 ; AVX1-NEXT:    retq
    309 ;
    310 ; AVX2-LABEL: trunc8i32_8i8:
    311 ; AVX2:       # BB#0: # %entry
    312 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    313 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    314 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    315 ; AVX2-NEXT:    vmovq %xmm0, (%rax)
    316 ; AVX2-NEXT:    vzeroupper
    317 ; AVX2-NEXT:    retq
    318 ;
    319 ; AVX512BW-LABEL: trunc8i32_8i8:
    320 ; AVX512BW:       # BB#0: # %entry
    321 ; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    322 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
    323 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    324 ; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
    325 ; AVX512BW-NEXT:    retq
    326 entry:
    327   %0 = trunc <8 x i32> %a to <8 x i8>
    328   store <8 x i8> %0, <8 x i8>* undef, align 4
    329   ret void
    330 }
    331 
    332 define void @trunc16i32_16i8(<16 x i32> %a) {
    333 ; SSE-LABEL: trunc16i32_16i8:
    334 ; SSE:       # BB#0: # %entry
    335 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    336 ; SSE-NEXT:    pand %xmm4, %xmm3
    337 ; SSE-NEXT:    pand %xmm4, %xmm2
    338 ; SSE-NEXT:    packuswb %xmm3, %xmm2
    339 ; SSE-NEXT:    pand %xmm4, %xmm1
    340 ; SSE-NEXT:    pand %xmm4, %xmm0
    341 ; SSE-NEXT:    packuswb %xmm1, %xmm0
    342 ; SSE-NEXT:    packuswb %xmm2, %xmm0
    343 ; SSE-NEXT:    movdqu %xmm0, (%rax)
    344 ; SSE-NEXT:    retq
    345 ;
    346 ; AVX1-LABEL: trunc16i32_16i8:
    347 ; AVX1:       # BB#0: # %entry
    348 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    349 ; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    350 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    351 ; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
    352 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
    353 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    354 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    355 ; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
    356 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
    357 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    358 ; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
    359 ; AVX1-NEXT:    vzeroupper
    360 ; AVX1-NEXT:    retq
    361 ;
    362 ; AVX2-LABEL: trunc16i32_16i8:
    363 ; AVX2:       # BB#0: # %entry
    364 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
    365 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
    366 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
    367 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    368 ; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
    369 ; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
    370 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    371 ; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
    372 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    373 ; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
    374 ; AVX2-NEXT:    vzeroupper
    375 ; AVX2-NEXT:    retq
    376 ;
    377 ; AVX512BW-LABEL: trunc16i32_16i8:
    378 ; AVX512BW:       # BB#0: # %entry
    379 ; AVX512BW-NEXT:    vpmovdb %zmm0, (%rax)
    380 ; AVX512BW-NEXT:    retq
    381 entry:
    382   %0 = trunc <16 x i32> %a to <16 x i8>
    383   store <16 x i8> %0, <16 x i8>* undef, align 4
    384   ret void
    385 }
    386 
    387 define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
    388 ; SSE2-LABEL: trunc2x4i64_8i32:
    389 ; SSE2:       # BB#0: # %entry
    390 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    391 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    392 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    393 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
    394 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    395 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    396 ; SSE2-NEXT:    retq
    397 ;
    398 ; SSSE3-LABEL: trunc2x4i64_8i32:
    399 ; SSSE3:       # BB#0: # %entry
    400 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    401 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    402 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    403 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
    404 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    405 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    406 ; SSSE3-NEXT:    retq
    407 ;
    408 ; SSE41-LABEL: trunc2x4i64_8i32:
    409 ; SSE41:       # BB#0: # %entry
    410 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    411 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    412 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    413 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
    414 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    415 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    416 ; SSE41-NEXT:    retq
    417 ;
    418 ; AVX1-LABEL: trunc2x4i64_8i32:
    419 ; AVX1:       # BB#0: # %entry
    420 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    421 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    422 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    423 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    424 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    425 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    426 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    427 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    428 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    429 ; AVX1-NEXT:    retq
    430 ;
    431 ; AVX2-LABEL: trunc2x4i64_8i32:
    432 ; AVX2:       # BB#0: # %entry
    433 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
    434 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
    435 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
    436 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
    437 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    438 ; AVX2-NEXT:    retq
    439 ;
    440 ; AVX512BW-LABEL: trunc2x4i64_8i32:
    441 ; AVX512BW:       # BB#0: # %entry
    442 ; AVX512BW-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
    443 ; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    444 ; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
    445 ; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
    446 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    447 ; AVX512BW-NEXT:    retq
    448 entry:
    449   %0 = trunc <4 x i64> %a to <4 x i32>
    450   %1 = trunc <4 x i64> %b to <4 x i32>
    451   %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    452   ret <8 x i32> %2
    453 }
    454 
    455 define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
    456 ; SSE2-LABEL: trunc2x4i64_8i16:
    457 ; SSE2:       # BB#0: # %entry
    458 ; SSE2-NEXT:    pextrw $4, %xmm1, %eax
    459 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    460 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
    461 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    462 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    463 ; SSE2-NEXT:    pextrw $4, %xmm3, %edx
    464 ; SSE2-NEXT:    movd %edx, %xmm1
    465 ; SSE2-NEXT:    movd %eax, %xmm3
    466 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
    467 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
    468 ; SSE2-NEXT:    movd %eax, %xmm1
    469 ; SSE2-NEXT:    movd %ecx, %xmm2
    470 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    471 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
    472 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    473 ; SSE2-NEXT:    retq
    474 ;
    475 ; SSSE3-LABEL: trunc2x4i64_8i16:
    476 ; SSSE3:       # BB#0: # %entry
    477 ; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
    478 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    479 ; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
    480 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    481 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    482 ; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
    483 ; SSSE3-NEXT:    movd %edx, %xmm1
    484 ; SSSE3-NEXT:    movd %eax, %xmm3
    485 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
    486 ; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
    487 ; SSSE3-NEXT:    movd %eax, %xmm1
    488 ; SSSE3-NEXT:    movd %ecx, %xmm2
    489 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    490 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
    491 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    492 ; SSSE3-NEXT:    retq
    493 ;
    494 ; SSE41-LABEL: trunc2x4i64_8i16:
    495 ; SSE41:       # BB#0: # %entry
    496 ; SSE41-NEXT:    pextrw $4, %xmm0, %eax
    497 ; SSE41-NEXT:    pinsrw $1, %eax, %xmm0
    498 ; SSE41-NEXT:    movd %xmm1, %eax
    499 ; SSE41-NEXT:    pinsrw $2, %eax, %xmm0
    500 ; SSE41-NEXT:    pextrw $4, %xmm1, %eax
    501 ; SSE41-NEXT:    pinsrw $3, %eax, %xmm0
    502 ; SSE41-NEXT:    movd %xmm2, %eax
    503 ; SSE41-NEXT:    pinsrw $4, %eax, %xmm0
    504 ; SSE41-NEXT:    pextrw $4, %xmm2, %eax
    505 ; SSE41-NEXT:    pinsrw $5, %eax, %xmm0
    506 ; SSE41-NEXT:    movd %xmm3, %eax
    507 ; SSE41-NEXT:    pinsrw $6, %eax, %xmm0
    508 ; SSE41-NEXT:    pextrw $4, %xmm3, %eax
    509 ; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
    510 ; SSE41-NEXT:    retq
    511 ;
    512 ; AVX1-LABEL: trunc2x4i64_8i16:
    513 ; AVX1:       # BB#0: # %entry
    514 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    515 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    516 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    517 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    518 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    519 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    520 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    521 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    522 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    523 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    524 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    525 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    526 ; AVX1-NEXT:    vzeroupper
    527 ; AVX1-NEXT:    retq
    528 ;
    529 ; AVX2-LABEL: trunc2x4i64_8i16:
    530 ; AVX2:       # BB#0: # %entry
    531 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
    532 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
    533 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
    534 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
    535 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    536 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    537 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    538 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    539 ; AVX2-NEXT:    vzeroupper
    540 ; AVX2-NEXT:    retq
    541 ;
    542 ; AVX512BW-LABEL: trunc2x4i64_8i16:
    543 ; AVX512BW:       # BB#0: # %entry
    544 ; AVX512BW-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
    545 ; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    546 ; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
    547 ; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
    548 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    549 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    550 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    551 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    552 ; AVX512BW-NEXT:    retq
    553 entry:
    554   %0 = trunc <4 x i64> %a to <4 x i16>
    555   %1 = trunc <4 x i64> %b to <4 x i16>
    556   %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    557   ret <8 x i16> %2
    558 }
    559 
    560 define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
    561 ; SSE2-LABEL: trunc2x2i64_4i32:
    562 ; SSE2:       # BB#0: # %entry
    563 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    564 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    565 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    566 ; SSE2-NEXT:    retq
    567 ;
    568 ; SSSE3-LABEL: trunc2x2i64_4i32:
    569 ; SSSE3:       # BB#0: # %entry
    570 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    571 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    572 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    573 ; SSSE3-NEXT:    retq
    574 ;
    575 ; SSE41-LABEL: trunc2x2i64_4i32:
    576 ; SSE41:       # BB#0: # %entry
    577 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    578 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    579 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    580 ; SSE41-NEXT:    retq
    581 ;
    582 ; AVX1-LABEL: trunc2x2i64_4i32:
    583 ; AVX1:       # BB#0: # %entry
    584 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    585 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    586 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    587 ; AVX1-NEXT:    retq
    588 ;
    589 ; AVX2-LABEL: trunc2x2i64_4i32:
    590 ; AVX2:       # BB#0: # %entry
    591 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    592 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    593 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    594 ; AVX2-NEXT:    retq
    595 ;
    596 ; AVX512BW-LABEL: trunc2x2i64_4i32:
    597 ; AVX512BW:       # BB#0: # %entry
    598 ; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    599 ; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    600 ; AVX512BW-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    601 ; AVX512BW-NEXT:    retq
    602 entry:
    603   %0 = trunc <2 x i64> %a to <2 x i32>
    604   %1 = trunc <2 x i64> %b to <2 x i32>
    605   %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    606   ret <4 x i32> %2
    607 }
    608 
    609 define i64 @trunc2i64_i64(<2 x i64> %inval) {
    610 ; SSE-LABEL: trunc2i64_i64:
    611 ; SSE:       # BB#0: # %entry
    612 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    613 ; SSE-NEXT:    movd %xmm0, %rax
    614 ; SSE-NEXT:    retq
    615 ;
    616 ; AVX-LABEL: trunc2i64_i64:
    617 ; AVX:       # BB#0: # %entry
    618 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    619 ; AVX-NEXT:    vmovq %xmm0, %rax
    620 ; AVX-NEXT:    retq
    621 ;
    622 ; AVX512BW-LABEL: trunc2i64_i64:
    623 ; AVX512BW:       # BB#0: # %entry
    624 ; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    625 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
    626 ; AVX512BW-NEXT:    retq
    627 entry:
    628   %0 = trunc <2 x i64> %inval to <2 x i32>
    629   %1 = bitcast <2 x i32> %0 to i64
    630   ret i64 %1
    631 }
    632 
    633 define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
    634 ; SSE2-LABEL: trunc2x4i32_8i16:
    635 ; SSE2:       # BB#0: # %entry
    636 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
    637 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
    638 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    639 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    640 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    641 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    642 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    643 ; SSE2-NEXT:    retq
    644 ;
    645 ; SSSE3-LABEL: trunc2x4i32_8i16:
    646 ; SSSE3:       # BB#0: # %entry
    647 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    648 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    649 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    650 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    651 ; SSSE3-NEXT:    retq
    652 ;
    653 ; SSE41-LABEL: trunc2x4i32_8i16:
    654 ; SSE41:       # BB#0: # %entry
    655 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    656 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    657 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    658 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    659 ; SSE41-NEXT:    retq
    660 ;
    661 ; AVX-LABEL: trunc2x4i32_8i16:
    662 ; AVX:       # BB#0: # %entry
    663 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    664 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    665 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    666 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    667 ; AVX-NEXT:    retq
    668 ;
    669 ; AVX512BW-LABEL: trunc2x4i32_8i16:
    670 ; AVX512BW:       # BB#0: # %entry
    671 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    672 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    673 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    674 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    675 ; AVX512BW-NEXT:    retq
    676 entry:
    677   %0 = trunc <4 x i32> %a to <4 x i16>
    678   %1 = trunc <4 x i32> %b to <4 x i16>
    679   %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    680   ret <8 x i16> %2
    681 }
    682 
    683 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
    684 define i64 @trunc4i32_i64(<4 x i32> %inval) {
    685 ; SSE2-LABEL: trunc4i32_i64:
    686 ; SSE2:       # BB#0: # %entry
    687 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    688 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    689 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    690 ; SSE2-NEXT:    movd %xmm0, %rax
    691 ; SSE2-NEXT:    retq
    692 ;
    693 ; SSSE3-LABEL: trunc4i32_i64:
    694 ; SSSE3:       # BB#0: # %entry
    695 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    696 ; SSSE3-NEXT:    movd %xmm0, %rax
    697 ; SSSE3-NEXT:    retq
    698 ;
    699 ; SSE41-LABEL: trunc4i32_i64:
    700 ; SSE41:       # BB#0: # %entry
    701 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    702 ; SSE41-NEXT:    movd %xmm0, %rax
    703 ; SSE41-NEXT:    retq
    704 ;
    705 ; AVX-LABEL: trunc4i32_i64:
    706 ; AVX:       # BB#0: # %entry
    707 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    708 ; AVX-NEXT:    vmovq %xmm0, %rax
    709 ; AVX-NEXT:    retq
    710 ;
    711 ; AVX512BW-LABEL: trunc4i32_i64:
    712 ; AVX512BW:       # BB#0: # %entry
    713 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    714 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
    715 ; AVX512BW-NEXT:    retq
    716 entry:
    717   %0 = trunc <4 x i32> %inval to <4 x i16>
    718   %1 = bitcast <4 x i16> %0 to i64
    719   ret i64 %1
    720 }
    721 
    722 define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
    723 ; SSE2-LABEL: trunc2x8i16_16i8:
    724 ; SSE2:       # BB#0: # %entry
    725 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    726 ; SSE2-NEXT:    pand %xmm2, %xmm1
    727 ; SSE2-NEXT:    pand %xmm2, %xmm0
    728 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    729 ; SSE2-NEXT:    retq
    730 ;
    731 ; SSSE3-LABEL: trunc2x8i16_16i8:
    732 ; SSSE3:       # BB#0: # %entry
    733 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    734 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    735 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    736 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    737 ; SSSE3-NEXT:    retq
    738 ;
    739 ; SSE41-LABEL: trunc2x8i16_16i8:
    740 ; SSE41:       # BB#0: # %entry
    741 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    742 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    743 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    744 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    745 ; SSE41-NEXT:    retq
    746 ;
    747 ; AVX-LABEL: trunc2x8i16_16i8:
    748 ; AVX:       # BB#0: # %entry
    749 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    750 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    751 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    752 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    753 ; AVX-NEXT:    retq
    754 ;
    755 ; AVX512BW-LABEL: trunc2x8i16_16i8:
    756 ; AVX512BW:       # BB#0: # %entry
    757 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    758 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    759 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    760 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    761 ; AVX512BW-NEXT:    retq
    762 entry:
    763   %0 = trunc <8 x i16> %a to <8 x i8>
    764   %1 = trunc <8 x i16> %b to <8 x i8>
    765   %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    766   ret <16 x i8> %2
    767 }
    768 
    769 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
    770 define i64 @trunc8i16_i64(<8 x i16> %inval) {
    771 ; SSE2-LABEL: trunc8i16_i64:
    772 ; SSE2:       # BB#0: # %entry
    773 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    774 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    775 ; SSE2-NEXT:    movd %xmm0, %rax
    776 ; SSE2-NEXT:    retq
    777 ;
    778 ; SSSE3-LABEL: trunc8i16_i64:
    779 ; SSSE3:       # BB#0: # %entry
    780 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    781 ; SSSE3-NEXT:    movd %xmm0, %rax
    782 ; SSSE3-NEXT:    retq
    783 ;
    784 ; SSE41-LABEL: trunc8i16_i64:
    785 ; SSE41:       # BB#0: # %entry
    786 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    787 ; SSE41-NEXT:    movd %xmm0, %rax
    788 ; SSE41-NEXT:    retq
    789 ;
    790 ; AVX-LABEL: trunc8i16_i64:
    791 ; AVX:       # BB#0: # %entry
    792 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    793 ; AVX-NEXT:    vmovq %xmm0, %rax
    794 ; AVX-NEXT:    retq
    795 ;
    796 ; AVX512BW-LABEL: trunc8i16_i64:
    797 ; AVX512BW:       # BB#0: # %entry
    798 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    799 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
    800 ; AVX512BW-NEXT:    retq
    801 entry:
    802   %0 = trunc <8 x i16> %inval to <8 x i8>
    803   %1 = bitcast <8 x i8> %0 to i64
    804   ret i64 %1
    805 }
    806 
    807 define <16 x i8> @trunc16i64_16i8_const() {
    808 ; SSE-LABEL: trunc16i64_16i8_const:
    809 ; SSE:       # BB#0: # %entry
    810 ; SSE-NEXT:    xorps %xmm0, %xmm0
    811 ; SSE-NEXT:    retq
    812 ;
    813 ; AVX-LABEL: trunc16i64_16i8_const:
    814 ; AVX:       # BB#0: # %entry
    815 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    816 ; AVX-NEXT:    retq
    817 ;
    818 ; AVX512BW-LABEL: trunc16i64_16i8_const:
    819 ; AVX512BW:       # BB#0: # %entry
    820 ; AVX512BW-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    821 ; AVX512BW-NEXT:    retq
    822 
    823 entry:
    824   %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
    825   %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
    826   ret <16 x i8> %1
    827 }
    828