Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      5 
      6 define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) {
      7 ; SSE2-LABEL: trunc2x2i64:
      8 ; SSE2:       # BB#0: # %entry
      9 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     10 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     11 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     12 ; SSE2-NEXT:    retq
     13 ;
     14 ; SSSE3-LABEL: trunc2x2i64:
     15 ; SSSE3:       # BB#0: # %entry
     16 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     17 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     18 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     19 ; SSSE3-NEXT:    retq
     20 ;
     21 ; SSE41-LABEL: trunc2x2i64:
     22 ; SSE41:       # BB#0: # %entry
     23 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
     24 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     25 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     26 ; SSE41-NEXT:    retq
     27 ;
     28 ; AVX-LABEL: trunc2x2i64:
     29 ; AVX:       # BB#0: # %entry
     30 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
     31 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     32 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
     33 ; AVX-NEXT:    retq
     34 
     35 
     36 entry:
     37   %0 = trunc <2 x i64> %a to <2 x i32>
     38   %1 = trunc <2 x i64> %b to <2 x i32>
     39   %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     40   ret <4 x i32> %2
     41 }
     42 
     43 define i64 @trunc2i64(<2 x i64> %inval) {
     44 ; SSE-LABEL: trunc2i64:
     45 ; SSE:       # BB#0: # %entry
     46 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     47 ; SSE-NEXT:    movd %xmm0, %rax
     48 ; SSE-NEXT:    retq
     49 ;
     50 ; AVX-LABEL: trunc2i64:
     51 ; AVX:       # BB#0: # %entry
     52 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     53 ; AVX-NEXT:    vmovq %xmm0, %rax
     54 ; AVX-NEXT:    retq
     55 
     56 
     57 entry:
     58   %0 = trunc <2 x i64> %inval to <2 x i32>
     59   %1 = bitcast <2 x i32> %0 to i64
     60   ret i64 %1
     61 }
     62 
     63 define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
     64 ; SSE2-LABEL: trunc2x4i32:
     65 ; SSE2:       # BB#0: # %entry
     66 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
     67 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
     68 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     69 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
     70 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
     71 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     72 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     73 ; SSE2-NEXT:    retq
     74 ;
     75 ; SSSE3-LABEL: trunc2x4i32:
     76 ; SSSE3:       # BB#0: # %entry
     77 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
     78 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
     79 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
     80 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     81 ; SSSE3-NEXT:    retq
     82 ;
     83 ; SSE41-LABEL: trunc2x4i32:
     84 ; SSE41:       # BB#0: # %entry
     85 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
     86 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
     87 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
     88 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     89 ; SSE41-NEXT:    retq
     90 ;
     91 ; AVX-LABEL: trunc2x4i32:
     92 ; AVX:       # BB#0: # %entry
     93 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
     94 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
     95 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
     96 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     97 ; AVX-NEXT:    retq
     98 
     99 
    100 
    101 
    102 entry:
    103   %0 = trunc <4 x i32> %a to <4 x i16>
    104   %1 = trunc <4 x i32> %b to <4 x i16>
    105   %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    106   ret <8 x i16> %2
    107 }
    108 
    109 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
    110 define i64 @trunc4i32(<4 x i32> %inval) {
    111 ; SSE2-LABEL: trunc4i32:
    112 ; SSE2:       # BB#0: # %entry
    113 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    114 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    115 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    116 ; SSE2-NEXT:    movd %xmm0, %rax
    117 ; SSE2-NEXT:    retq
    118 ;
    119 ; SSSE3-LABEL: trunc4i32:
    120 ; SSSE3:       # BB#0: # %entry
    121 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    122 ; SSSE3-NEXT:    movd %xmm0, %rax
    123 ; SSSE3-NEXT:    retq
    124 ;
    125 ; SSE41-LABEL: trunc4i32:
    126 ; SSE41:       # BB#0: # %entry
    127 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    128 ; SSE41-NEXT:    movd %xmm0, %rax
    129 ; SSE41-NEXT:    retq
    130 ;
    131 ; AVX-LABEL: trunc4i32:
    132 ; AVX:       # BB#0: # %entry
    133 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
    134 ; AVX-NEXT:    vmovq %xmm0, %rax
    135 ; AVX-NEXT:    retq
    136 
    137 
    138 
    139 
    140 entry:
    141   %0 = trunc <4 x i32> %inval to <4 x i16>
    142   %1 = bitcast <4 x i16> %0 to i64
    143   ret i64 %1
    144 }
    145 
    146 define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
    147 ; SSE2-LABEL: trunc2x8i16:
    148 ; SSE2:       # BB#0: # %entry
    149 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
    150 ; SSE2-NEXT:    pand %xmm2, %xmm1
    151 ; SSE2-NEXT:    pand %xmm2, %xmm0
    152 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
    153 ; SSE2-NEXT:    retq
    154 ;
    155 ; SSSE3-LABEL: trunc2x8i16:
    156 ; SSSE3:       # BB#0: # %entry
    157 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    158 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
    159 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
    160 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    161 ; SSSE3-NEXT:    retq
    162 ;
    163 ; SSE41-LABEL: trunc2x8i16:
    164 ; SSE41:       # BB#0: # %entry
    165 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    166 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
    167 ; SSE41-NEXT:    pshufb %xmm2, %xmm0
    168 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    169 ; SSE41-NEXT:    retq
    170 ;
    171 ; AVX-LABEL: trunc2x8i16:
    172 ; AVX:       # BB#0: # %entry
    173 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    174 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    175 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    176 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    177 ; AVX-NEXT:    retq
    178 
    179 
    180 
    181 
    182 entry:
    183   %0 = trunc <8 x i16> %a to <8 x i8>
    184   %1 = trunc <8 x i16> %b to <8 x i8>
    185   %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    186   ret <16 x i8> %2
    187 }
    188 
    189 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
    190 define i64 @trunc8i16(<8 x i16> %inval) {
    191 ; SSE2-LABEL: trunc8i16:
    192 ; SSE2:       # BB#0: # %entry
    193 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    194 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    195 ; SSE2-NEXT:    movd %xmm0, %rax
    196 ; SSE2-NEXT:    retq
    197 ;
    198 ; SSSE3-LABEL: trunc8i16:
    199 ; SSSE3:       # BB#0: # %entry
    200 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    201 ; SSSE3-NEXT:    movd %xmm0, %rax
    202 ; SSSE3-NEXT:    retq
    203 ;
    204 ; SSE41-LABEL: trunc8i16:
    205 ; SSE41:       # BB#0: # %entry
    206 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    207 ; SSE41-NEXT:    movd %xmm0, %rax
    208 ; SSE41-NEXT:    retq
    209 ;
    210 ; AVX-LABEL: trunc8i16:
    211 ; AVX:       # BB#0: # %entry
    212 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    213 ; AVX-NEXT:    vmovq %xmm0, %rax
    214 ; AVX-NEXT:    retq
    215 
    216 
    217 
    218 
    219 entry:
    220   %0 = trunc <8 x i16> %inval to <8 x i8>
    221   %1 = bitcast <8 x i8> %0 to i64
    222   ret i64 %1
    223 }
    224 
    225 define <16 x i8> @trunc16i64_const() {
    226 ; SSE-LABEL:  trunc16i64_const
    227 ; SSE:        # BB#0: # %entry
    228 ; SSE-NEXT:   xorps %xmm0, %xmm0
    229 ; SSE-NEXT:   retq
    230 ;
    231 ; AVX-LABEL:  trunc16i64_const
    232 ; AVX:        # BB#0: # %entry
    233 ; AVX-NEXT:   vxorps %xmm0, %xmm0, %xmm0
    234 ; AVX-NEXT:   retq
    235 
    236 entry:
    237   %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
    238   %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
    239   ret <16 x i8> %1
    240 }
    241