Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
      8 
      9 define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
     10 ; SSE2-LABEL: trunc8i64_8i32:
     11 ; SSE2:       # BB#0: # %entry
     12 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     13 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     14 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     15 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
     16 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
     17 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
     18 ; SSE2-NEXT:    retq
     19 ;
     20 ; SSSE3-LABEL: trunc8i64_8i32:
     21 ; SSSE3:       # BB#0: # %entry
     22 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     23 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     24 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     25 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
     26 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
     27 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
     28 ; SSSE3-NEXT:    retq
     29 ;
     30 ; SSE41-LABEL: trunc8i64_8i32:
     31 ; SSE41:       # BB#0: # %entry
     32 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
     33 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     34 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     35 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
     36 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
     37 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
     38 ; SSE41-NEXT:    retq
     39 ;
     40 ; AVX1-LABEL: trunc8i64_8i32:
     41 ; AVX1:       # BB#0: # %entry
     42 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
     43 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
     44 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     45 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
     46 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
     47 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
     48 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     49 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
     50 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     51 ; AVX1-NEXT:    retq
     52 ;
     53 ; AVX2-LABEL: trunc8i64_8i32:
     54 ; AVX2:       # BB#0: # %entry
     55 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
     56 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
     57 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
     58 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
     59 ; AVX2-NEXT:    retq
     60 ;
     61 ; AVX512BW-LABEL: trunc8i64_8i32:
     62 ; AVX512BW:       # BB#0: # %entry
     63 ; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
     64 ; AVX512BW-NEXT:    retq
     65 entry:
     66   %0 = trunc <8 x i64> %a to <8 x i32>
     67   ret <8 x i32> %0
     68 }
     69 
     70 define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
     71 ; SSE2-LABEL: trunc8i64_8i16:
     72 ; SSE2:       # BB#0: # %entry
     73 ; SSE2-NEXT:    pextrw $4, %xmm1, %eax
     74 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
     75 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
     76 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     77 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     78 ; SSE2-NEXT:    pextrw $4, %xmm3, %edx
     79 ; SSE2-NEXT:    movd %edx, %xmm1
     80 ; SSE2-NEXT:    movd %eax, %xmm3
     81 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
     82 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
     83 ; SSE2-NEXT:    movd %eax, %xmm1
     84 ; SSE2-NEXT:    movd %ecx, %xmm2
     85 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
     86 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
     87 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     88 ; SSE2-NEXT:    retq
     89 ;
     90 ; SSSE3-LABEL: trunc8i64_8i16:
     91 ; SSSE3:       # BB#0: # %entry
     92 ; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
     93 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
     94 ; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
     95 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     96 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     97 ; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
     98 ; SSSE3-NEXT:    movd %edx, %xmm1
     99 ; SSSE3-NEXT:    movd %eax, %xmm3
    100 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
    101 ; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
    102 ; SSSE3-NEXT:    movd %eax, %xmm1
    103 ; SSSE3-NEXT:    movd %ecx, %xmm2
    104 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    105 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
    106 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    107 ; SSSE3-NEXT:    retq
    108 ;
    109 ; SSE41-LABEL: trunc8i64_8i16:
    110 ; SSE41:       # BB#0: # %entry
    111 ; SSE41-NEXT:    pxor %xmm4, %xmm4
    112 ; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
    113 ; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
    114 ; SSE41-NEXT:    packusdw %xmm3, %xmm2
    115 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
    116 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
    117 ; SSE41-NEXT:    packusdw %xmm1, %xmm0
    118 ; SSE41-NEXT:    packusdw %xmm2, %xmm0
    119 ; SSE41-NEXT:    retq
    120 ;
    121 ; AVX1-LABEL: trunc8i64_8i16:
    122 ; AVX1:       # BB#0: # %entry
    123 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    124 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    125 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
    126 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
    127 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
    128 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    129 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
    130 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
    131 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
    132 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
    133 ; AVX1-NEXT:    vzeroupper
    134 ; AVX1-NEXT:    retq
    135 ;
    136 ; AVX2-LABEL: trunc8i64_8i16:
    137 ; AVX2:       # BB#0: # %entry
    138 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
    139 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
    140 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
    141 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    142 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    143 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    144 ; AVX2-NEXT:    vzeroupper
    145 ; AVX2-NEXT:    retq
    146 ;
    147 ; AVX512BW-LABEL: trunc8i64_8i16:
    148 ; AVX512BW:       # BB#0: # %entry
    149 ; AVX512BW-NEXT:    vpmovqw %zmm0, %xmm0
    150 ; AVX512BW-NEXT:    retq
    151 entry:
    152   %0 = trunc <8 x i64> %a to <8 x i16>
    153   ret <8 x i16> %0
    154 }
    155 
    156 define void @trunc8i64_8i8(<8 x i64> %a) {
    157 ; SSE-LABEL: trunc8i64_8i8:
    158 ; SSE:       # BB#0: # %entry
    159 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
    160 ; SSE-NEXT:    pand %xmm4, %xmm3
    161 ; SSE-NEXT:    pand %xmm4, %xmm2
    162 ; SSE-NEXT:    packuswb %xmm3, %xmm2
    163 ; SSE-NEXT:    pand %xmm4, %xmm1
    164 ; SSE-NEXT:    pand %xmm4, %xmm0
    165 ; SSE-NEXT:    packuswb %xmm1, %xmm0
    166 ; SSE-NEXT:    packuswb %xmm2, %xmm0
    167 ; SSE-NEXT:    packuswb %xmm0, %xmm0
    168 ; SSE-NEXT:    movq %xmm0, (%rax)
    169 ; SSE-NEXT:    retq
    170 ;
    171 ; AVX1-LABEL: trunc8i64_8i8:
    172 ; AVX1:       # BB#0: # %entry
    173 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    174 ; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
    175 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    176 ; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
    177 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
    178 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    179 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    180 ; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
    181 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
    182 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    183 ; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
    184 ; AVX1-NEXT:    vmovq %xmm0, (%rax)
    185 ; AVX1-NEXT:    vzeroupper
    186 ; AVX1-NEXT:    retq
    187 ;
    188 ; AVX2-LABEL: trunc8i64_8i8:
    189 ; AVX2:       # BB#0: # %entry
    190 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
    191 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
    192 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
    193 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    194 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    195 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    196 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    197 ; AVX2-NEXT:    vmovq %xmm0, (%rax)
    198 ; AVX2-NEXT:    vzeroupper
    199 ; AVX2-NEXT:    retq
    200 ;
    201 ; AVX512BW-LABEL: trunc8i64_8i8:
    202 ; AVX512BW:       # BB#0: # %entry
    203 ; AVX512BW-NEXT:    vpmovqb %zmm0, (%rax)
    204 ; AVX512BW-NEXT:    retq
    205 entry:
    206   %0 = trunc <8 x i64> %a to <8 x i8>
    207   store <8 x i8> %0, <8 x i8>* undef, align 4
    208   ret void
    209 }
    210 
    211 define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
    212 ; SSE2-LABEL: trunc8i32_8i16:
    213 ; SSE2:       # BB#0: # %entry
    214 ; SSE2-NEXT:    pslld $16, %xmm1
    215 ; SSE2-NEXT:    psrad $16, %xmm1
    216 ; SSE2-NEXT:    pslld $16, %xmm0
    217 ; SSE2-NEXT:    psrad $16, %xmm0
    218 ; SSE2-NEXT:    packssdw %xmm1, %xmm0
    219 ; SSE2-NEXT:    retq
    220 ;
    221 ; SSSE3-LABEL: trunc8i32_8i16:
    222 ; SSSE3:       # BB#0: # %entry
    223 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    224 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    225 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    226 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    227 ; SSSE3-NEXT:    retq
    228 ;
    229 ; SSE41-LABEL: trunc8i32_8i16:
    230 ; SSE41:       # BB#0: # %entry
    231 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    232 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    233 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    234 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    235 ; SSE41-NEXT:    retq
    236 ;
    237 ; AVX1-LABEL: trunc8i32_8i16:
    238 ; AVX1:       # BB#0: # %entry
    239 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    240 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    241 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    242 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    243 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    244 ; AVX1-NEXT:    vzeroupper
    245 ; AVX1-NEXT:    retq
    246 ;
    247 ; AVX2-LABEL: trunc8i32_8i16:
    248 ; AVX2:       # BB#0: # %entry
    249 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    250 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    251 ; AVX2-NEXT:    vzeroupper
    252 ; AVX2-NEXT:    retq
    253 ;
    254 ; AVX512BW-LABEL: trunc8i32_8i16:
    255 ; AVX512BW:       # BB#0: # %entry
    256 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
    257 ; AVX512BW-NEXT:    retq
    258 entry:
    259   %0 = trunc <8 x i32> %a to <8 x i16>
    260   ret <8 x i16> %0
    261 }
    262 
    263 define void @trunc8i32_8i8(<8 x i32> %a) {
    264 ; SSE2-LABEL: trunc8i32_8i8:
    265 ; SSE2:       # BB#0: # %entry
    266 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    267 ; SSE2-NEXT:    pand %xmm2, %xmm1
    268 ; SSE2-NEXT:    pand %xmm2, %xmm0
    269 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    270 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    271 ; SSE2-NEXT:    movq %xmm0, (%rax)
    272 ; SSE2-NEXT:    retq
    273 ;
    274 ; SSSE3-LABEL: trunc8i32_8i8:
    275 ; SSSE3:       # BB#0: # %entry
    276 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    277 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    278 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    279 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    280 ; SSSE3-NEXT:    movq %xmm0, (%rax)
    281 ; SSSE3-NEXT:    retq
    282 ;
    283 ; SSE41-LABEL: trunc8i32_8i8:
    284 ; SSE41:       # BB#0: # %entry
    285 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    286 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    287 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    288 ; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    289 ; SSE41-NEXT:    movq %xmm0, (%rax)
    290 ; SSE41-NEXT:    retq
    291 ;
    292 ; AVX1-LABEL: trunc8i32_8i8:
    293 ; AVX1:       # BB#0: # %entry
    294 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    295 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
    296 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    297 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    298 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    299 ; AVX1-NEXT:    vmovq %xmm0, (%rax)
    300 ; AVX1-NEXT:    vzeroupper
    301 ; AVX1-NEXT:    retq
    302 ;
    303 ; AVX2-LABEL: trunc8i32_8i8:
    304 ; AVX2:       # BB#0: # %entry
    305 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    306 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    307 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    308 ; AVX2-NEXT:    vmovq %xmm0, (%rax)
    309 ; AVX2-NEXT:    vzeroupper
    310 ; AVX2-NEXT:    retq
    311 ;
    312 ; AVX512BW-LABEL: trunc8i32_8i8:
    313 ; AVX512BW:       # BB#0: # %entry
    314 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
    315 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    316 ; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
    317 ; AVX512BW-NEXT:    retq
    318 entry:
    319   %0 = trunc <8 x i32> %a to <8 x i8>
    320   store <8 x i8> %0, <8 x i8>* undef, align 4
    321   ret void
    322 }
    323 
    324 define void @trunc16i32_16i8(<16 x i32> %a) {
    325 ; SSE-LABEL: trunc16i32_16i8:
    326 ; SSE:       # BB#0: # %entry
    327 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    328 ; SSE-NEXT:    pand %xmm4, %xmm3
    329 ; SSE-NEXT:    pand %xmm4, %xmm2
    330 ; SSE-NEXT:    packuswb %xmm3, %xmm2
    331 ; SSE-NEXT:    pand %xmm4, %xmm1
    332 ; SSE-NEXT:    pand %xmm4, %xmm0
    333 ; SSE-NEXT:    packuswb %xmm1, %xmm0
    334 ; SSE-NEXT:    packuswb %xmm2, %xmm0
    335 ; SSE-NEXT:    movdqu %xmm0, (%rax)
    336 ; SSE-NEXT:    retq
    337 ;
    338 ; AVX1-LABEL: trunc16i32_16i8:
    339 ; AVX1:       # BB#0: # %entry
    340 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    341 ; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    342 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    343 ; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
    344 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
    345 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    346 ; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
    347 ; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
    348 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
    349 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    350 ; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
    351 ; AVX1-NEXT:    vzeroupper
    352 ; AVX1-NEXT:    retq
    353 ;
    354 ; AVX2-LABEL: trunc16i32_16i8:
    355 ; AVX2:       # BB#0: # %entry
    356 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
    357 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
    358 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
    359 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    360 ; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
    361 ; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
    362 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    363 ; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
    364 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    365 ; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
    366 ; AVX2-NEXT:    vzeroupper
    367 ; AVX2-NEXT:    retq
    368 ;
    369 ; AVX512BW-LABEL: trunc16i32_16i8:
    370 ; AVX512BW:       # BB#0: # %entry
    371 ; AVX512BW-NEXT:    vpmovdb %zmm0, (%rax)
    372 ; AVX512BW-NEXT:    retq
    373 entry:
    374   %0 = trunc <16 x i32> %a to <16 x i8>
    375   store <16 x i8> %0, <16 x i8>* undef, align 4
    376   ret void
    377 }
    378 
    379 define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
    380 ; SSE2-LABEL: trunc2x4i64_8i32:
    381 ; SSE2:       # BB#0: # %entry
    382 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    383 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    384 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    385 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
    386 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    387 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    388 ; SSE2-NEXT:    retq
    389 ;
    390 ; SSSE3-LABEL: trunc2x4i64_8i32:
    391 ; SSSE3:       # BB#0: # %entry
    392 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    393 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    394 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    395 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
    396 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    397 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    398 ; SSSE3-NEXT:    retq
    399 ;
    400 ; SSE41-LABEL: trunc2x4i64_8i32:
    401 ; SSE41:       # BB#0: # %entry
    402 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    403 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    404 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    405 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
    406 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
    407 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
    408 ; SSE41-NEXT:    retq
    409 ;
    410 ; AVX1-LABEL: trunc2x4i64_8i32:
    411 ; AVX1:       # BB#0: # %entry
    412 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    413 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    414 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    415 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    416 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    417 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    418 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    419 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    420 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    421 ; AVX1-NEXT:    retq
    422 ;
    423 ; AVX2-LABEL: trunc2x4i64_8i32:
    424 ; AVX2:       # BB#0: # %entry
    425 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
    426 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
    427 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
    428 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    429 ; AVX2-NEXT:    retq
    430 ;
    431 ; AVX512BW-LABEL: trunc2x4i64_8i32:
    432 ; AVX512BW:       # BB#0: # %entry
    433 ; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
    434 ; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
    435 ; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    436 ; AVX512BW-NEXT:    retq
    437 entry:
    438   %0 = trunc <4 x i64> %a to <4 x i32>
    439   %1 = trunc <4 x i64> %b to <4 x i32>
    440   %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    441   ret <8 x i32> %2
    442 }
    443 
    444 define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
    445 ; SSE2-LABEL: trunc2x4i64_8i16:
    446 ; SSE2:       # BB#0: # %entry
    447 ; SSE2-NEXT:    pextrw $4, %xmm1, %eax
    448 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    449 ; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
    450 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    451 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    452 ; SSE2-NEXT:    pextrw $4, %xmm3, %edx
    453 ; SSE2-NEXT:    movd %edx, %xmm1
    454 ; SSE2-NEXT:    movd %eax, %xmm3
    455 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
    456 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
    457 ; SSE2-NEXT:    movd %eax, %xmm1
    458 ; SSE2-NEXT:    movd %ecx, %xmm2
    459 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    460 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
    461 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    462 ; SSE2-NEXT:    retq
    463 ;
    464 ; SSSE3-LABEL: trunc2x4i64_8i16:
    465 ; SSSE3:       # BB#0: # %entry
    466 ; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
    467 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    468 ; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
    469 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    470 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    471 ; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
    472 ; SSSE3-NEXT:    movd %edx, %xmm1
    473 ; SSSE3-NEXT:    movd %eax, %xmm3
    474 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
    475 ; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
    476 ; SSSE3-NEXT:    movd %eax, %xmm1
    477 ; SSSE3-NEXT:    movd %ecx, %xmm2
    478 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    479 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
    480 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    481 ; SSSE3-NEXT:    retq
    482 ;
    483 ; SSE41-LABEL: trunc2x4i64_8i16:
    484 ; SSE41:       # BB#0: # %entry
    485 ; SSE41-NEXT:    pextrw $4, %xmm0, %eax
    486 ; SSE41-NEXT:    pinsrw $1, %eax, %xmm0
    487 ; SSE41-NEXT:    movd %xmm1, %eax
    488 ; SSE41-NEXT:    pinsrw $2, %eax, %xmm0
    489 ; SSE41-NEXT:    pextrw $4, %xmm1, %eax
    490 ; SSE41-NEXT:    pinsrw $3, %eax, %xmm0
    491 ; SSE41-NEXT:    movd %xmm2, %eax
    492 ; SSE41-NEXT:    pinsrw $4, %eax, %xmm0
    493 ; SSE41-NEXT:    pextrw $4, %xmm2, %eax
    494 ; SSE41-NEXT:    pinsrw $5, %eax, %xmm0
    495 ; SSE41-NEXT:    movd %xmm3, %eax
    496 ; SSE41-NEXT:    pinsrw $6, %eax, %xmm0
    497 ; SSE41-NEXT:    pextrw $4, %xmm3, %eax
    498 ; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
    499 ; SSE41-NEXT:    retq
    500 ;
    501 ; AVX1-LABEL: trunc2x4i64_8i16:
    502 ; AVX1:       # BB#0: # %entry
    503 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    504 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    505 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    506 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    507 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    508 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
    509 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    510 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    511 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    512 ; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    513 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    514 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    515 ; AVX1-NEXT:    vzeroupper
    516 ; AVX1-NEXT:    retq
    517 ;
    518 ; AVX2-LABEL: trunc2x4i64_8i16:
    519 ; AVX2:       # BB#0: # %entry
    520 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
    521 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
    522 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
    523 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    524 ; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    525 ; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    526 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    527 ; AVX2-NEXT:    vzeroupper
    528 ; AVX2-NEXT:    retq
    529 ;
    530 ; AVX512BW-LABEL: trunc2x4i64_8i16:
    531 ; AVX512BW:       # BB#0: # %entry
    532 ; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
    533 ; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
    534 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    535 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    536 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    537 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    538 ; AVX512BW-NEXT:    retq
    539 entry:
    540   %0 = trunc <4 x i64> %a to <4 x i16>
    541   %1 = trunc <4 x i64> %b to <4 x i16>
    542   %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    543   ret <8 x i16> %2
    544 }
    545 
    546 define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
    547 ; SSE2-LABEL: trunc2x2i64_4i32:
    548 ; SSE2:       # BB#0: # %entry
    549 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    550 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    551 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    552 ; SSE2-NEXT:    retq
    553 ;
    554 ; SSSE3-LABEL: trunc2x2i64_4i32:
    555 ; SSSE3:       # BB#0: # %entry
    556 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    557 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    558 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    559 ; SSSE3-NEXT:    retq
    560 ;
    561 ; SSE41-LABEL: trunc2x2i64_4i32:
    562 ; SSE41:       # BB#0: # %entry
    563 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    564 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    565 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    566 ; SSE41-NEXT:    retq
    567 ;
    568 ; AVX1-LABEL: trunc2x2i64_4i32:
    569 ; AVX1:       # BB#0: # %entry
    570 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    571 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    572 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    573 ; AVX1-NEXT:    retq
    574 ;
    575 ; AVX2-LABEL: trunc2x2i64_4i32:
    576 ; AVX2:       # BB#0: # %entry
    577 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    578 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    579 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    580 ; AVX2-NEXT:    retq
    581 ;
    582 ; AVX512BW-LABEL: trunc2x2i64_4i32:
    583 ; AVX512BW:       # BB#0: # %entry
    584 ; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
    585 ; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    586 ; AVX512BW-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    587 ; AVX512BW-NEXT:    retq
    588 entry:
    589   %0 = trunc <2 x i64> %a to <2 x i32>
    590   %1 = trunc <2 x i64> %b to <2 x i32>
    591   %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    592   ret <4 x i32> %2
    593 }
    594 
    595 define i64 @trunc2i64_i64(<2 x i64> %inval) {
    596 ; SSE-LABEL: trunc2i64_i64:
    597 ; SSE:       # BB#0: # %entry
    598 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    599 ; SSE-NEXT:    movd %xmm0, %rax
    600 ; SSE-NEXT:    retq
    601 ;
    602 ; AVX-LABEL: trunc2i64_i64:
    603 ; AVX:       # BB#0: # %entry
    604 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    605 ; AVX-NEXT:    vmovq %xmm0, %rax
    606 ; AVX-NEXT:    retq
    607 ;
    608 ; AVX512BW-LABEL: trunc2i64_i64:
    609 ; AVX512BW:       # BB#0: # %entry
    610 ; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    611 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
    612 ; AVX512BW-NEXT:    retq
    613 entry:
    614   %0 = trunc <2 x i64> %inval to <2 x i32>
    615   %1 = bitcast <2 x i32> %0 to i64
    616   ret i64 %1
    617 }
    618 
    619 define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
    620 ; SSE2-LABEL: trunc2x4i32_8i16:
    621 ; SSE2:       # BB#0: # %entry
    622 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
    623 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
    624 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    625 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    626 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    627 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    628 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    629 ; SSE2-NEXT:    retq
    630 ;
    631 ; SSSE3-LABEL: trunc2x4i32_8i16:
    632 ; SSSE3:       # BB#0: # %entry
    633 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    634 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    635 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    636 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    637 ; SSSE3-NEXT:    retq
    638 ;
    639 ; SSE41-LABEL: trunc2x4i32_8i16:
    640 ; SSE41:       # BB#0: # %entry
    641 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    642 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    643 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    644 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    645 ; SSE41-NEXT:    retq
    646 ;
    647 ; AVX-LABEL: trunc2x4i32_8i16:
    648 ; AVX:       # BB#0: # %entry
    649 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    650 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    651 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    652 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    653 ; AVX-NEXT:    retq
    654 ;
    655 ; AVX512BW-LABEL: trunc2x4i32_8i16:
    656 ; AVX512BW:       # BB#0: # %entry
    657 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    658 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    659 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    660 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    661 ; AVX512BW-NEXT:    retq
    662 entry:
    663   %0 = trunc <4 x i32> %a to <4 x i16>
    664   %1 = trunc <4 x i32> %b to <4 x i16>
    665   %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    666   ret <8 x i16> %2
    667 }
    668 
    669 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
    670 define i64 @trunc4i32_i64(<4 x i32> %inval) {
    671 ; SSE2-LABEL: trunc4i32_i64:
    672 ; SSE2:       # BB#0: # %entry
    673 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    674 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    675 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    676 ; SSE2-NEXT:    movd %xmm0, %rax
    677 ; SSE2-NEXT:    retq
    678 ;
    679 ; SSSE3-LABEL: trunc4i32_i64:
    680 ; SSSE3:       # BB#0: # %entry
    681 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    682 ; SSSE3-NEXT:    movd %xmm0, %rax
    683 ; SSSE3-NEXT:    retq
    684 ;
    685 ; SSE41-LABEL: trunc4i32_i64:
    686 ; SSE41:       # BB#0: # %entry
    687 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    688 ; SSE41-NEXT:    movd %xmm0, %rax
    689 ; SSE41-NEXT:    retq
    690 ;
    691 ; AVX-LABEL: trunc4i32_i64:
    692 ; AVX:       # BB#0: # %entry
    693 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    694 ; AVX-NEXT:    vmovq %xmm0, %rax
    695 ; AVX-NEXT:    retq
    696 ;
    697 ; AVX512BW-LABEL: trunc4i32_i64:
    698 ; AVX512BW:       # BB#0: # %entry
    699 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    700 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
    701 ; AVX512BW-NEXT:    retq
    702 entry:
    703   %0 = trunc <4 x i32> %inval to <4 x i16>
    704   %1 = bitcast <4 x i16> %0 to i64
    705   ret i64 %1
    706 }
    707 
    708 define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
    709 ; SSE2-LABEL: trunc2x8i16_16i8:
    710 ; SSE2:       # BB#0: # %entry
    711 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    712 ; SSE2-NEXT:    pand %xmm2, %xmm1
    713 ; SSE2-NEXT:    pand %xmm2, %xmm0
    714 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    715 ; SSE2-NEXT:    retq
    716 ;
    717 ; SSSE3-LABEL: trunc2x8i16_16i8:
    718 ; SSSE3:       # BB#0: # %entry
    719 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    720 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    721 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    722 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    723 ; SSSE3-NEXT:    retq
    724 ;
    725 ; SSE41-LABEL: trunc2x8i16_16i8:
    726 ; SSE41:       # BB#0: # %entry
    727 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    728 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    729 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    730 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    731 ; SSE41-NEXT:    retq
    732 ;
    733 ; AVX-LABEL: trunc2x8i16_16i8:
    734 ; AVX:       # BB#0: # %entry
    735 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    736 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    737 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    738 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    739 ; AVX-NEXT:    retq
    740 ;
    741 ; AVX512BW-LABEL: trunc2x8i16_16i8:
    742 ; AVX512BW:       # BB#0: # %entry
    743 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    744 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    745 ; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    746 ; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    747 ; AVX512BW-NEXT:    retq
    748 entry:
    749   %0 = trunc <8 x i16> %a to <8 x i8>
    750   %1 = trunc <8 x i16> %b to <8 x i8>
    751   %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    752   ret <16 x i8> %2
    753 }
    754 
    755 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
    756 define i64 @trunc8i16_i64(<8 x i16> %inval) {
    757 ; SSE2-LABEL: trunc8i16_i64:
    758 ; SSE2:       # BB#0: # %entry
    759 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    760 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    761 ; SSE2-NEXT:    movd %xmm0, %rax
    762 ; SSE2-NEXT:    retq
    763 ;
    764 ; SSSE3-LABEL: trunc8i16_i64:
    765 ; SSSE3:       # BB#0: # %entry
    766 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    767 ; SSSE3-NEXT:    movd %xmm0, %rax
    768 ; SSSE3-NEXT:    retq
    769 ;
    770 ; SSE41-LABEL: trunc8i16_i64:
    771 ; SSE41:       # BB#0: # %entry
    772 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    773 ; SSE41-NEXT:    movd %xmm0, %rax
    774 ; SSE41-NEXT:    retq
    775 ;
    776 ; AVX-LABEL: trunc8i16_i64:
    777 ; AVX:       # BB#0: # %entry
    778 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    779 ; AVX-NEXT:    vmovq %xmm0, %rax
    780 ; AVX-NEXT:    retq
    781 ;
    782 ; AVX512BW-LABEL: trunc8i16_i64:
    783 ; AVX512BW:       # BB#0: # %entry
    784 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    785 ; AVX512BW-NEXT:    vmovq %xmm0, %rax
    786 ; AVX512BW-NEXT:    retq
    787 entry:
    788   %0 = trunc <8 x i16> %inval to <8 x i8>
    789   %1 = bitcast <8 x i8> %0 to i64
    790   ret i64 %1
    791 }
    792 
    793 define <16 x i8> @trunc16i64_16i8_const() {
    794 ; SSE-LABEL: trunc16i64_16i8_const:
    795 ; SSE:       # BB#0: # %entry
    796 ; SSE-NEXT:    xorps %xmm0, %xmm0
    797 ; SSE-NEXT:    retq
    798 ;
    799 ; AVX-LABEL: trunc16i64_16i8_const:
    800 ; AVX:       # BB#0: # %entry
    801 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    802 ; AVX-NEXT:    retq
    803 ;
    804 ; AVX512BW-LABEL: trunc16i64_16i8_const:
    805 ; AVX512BW:       # BB#0: # %entry
    806 ; AVX512BW-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    807 ; AVX512BW-NEXT:    retq
    808 
    809 entry:
    810   %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
    811   %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
    812   ret <16 x i8> %1
    813 }
    814