Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
      4 
      5 define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
      6 ; SSE2-LABEL: trunc_shl_7_v4i32_v4i64:
      7 ; SSE2:       # %bb.0:
      8 ; SSE2-NEXT:    movaps (%rsi), %xmm0
      9 ; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
     10 ; SSE2-NEXT:    pslld $7, %xmm0
     11 ; SSE2-NEXT:    movdqa %xmm0, (%rdi)
     12 ; SSE2-NEXT:    retq
     13 ;
     14 ; AVX2-LABEL: trunc_shl_7_v4i32_v4i64:
     15 ; AVX2:       # %bb.0:
     16 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = mem[0,2,2,3,4,6,6,7]
     17 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
     18 ; AVX2-NEXT:    vpslld $7, %xmm0, %xmm0
     19 ; AVX2-NEXT:    vmovdqa %xmm0, (%rdi)
     20 ; AVX2-NEXT:    vzeroupper
     21 ; AVX2-NEXT:    retq
     22   %val = load <4 x i64>, <4 x i64> addrspace(1)* %in
     23   %shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7>
     24   %trunc = trunc <4 x i64> %shl to <4 x i32>
     25   store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out
     26   ret void
     27 }
     28 
     29 define <8 x i16> @trunc_shl_15_v8i16_v8i32(<8 x i32> %a) {
     30 ; SSE2-LABEL: trunc_shl_15_v8i16_v8i32:
     31 ; SSE2:       # %bb.0:
     32 ; SSE2-NEXT:    pslld $16, %xmm1
     33 ; SSE2-NEXT:    psrad $16, %xmm1
     34 ; SSE2-NEXT:    pslld $16, %xmm0
     35 ; SSE2-NEXT:    psrad $16, %xmm0
     36 ; SSE2-NEXT:    packssdw %xmm1, %xmm0
     37 ; SSE2-NEXT:    psllw $15, %xmm0
     38 ; SSE2-NEXT:    retq
     39 ;
     40 ; AVX2-LABEL: trunc_shl_15_v8i16_v8i32:
     41 ; AVX2:       # %bb.0:
     42 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
     43 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
     44 ; AVX2-NEXT:    vpsllw $15, %xmm0, %xmm0
     45 ; AVX2-NEXT:    vzeroupper
     46 ; AVX2-NEXT:    retq
     47   %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
     48   %conv = trunc <8 x i32> %shl to <8 x i16>
     49   ret <8 x i16> %conv
     50 }
     51 
     52 define <8 x i16> @trunc_shl_16_v8i16_v8i32(<8 x i32> %a) {
     53 ; SSE2-LABEL: trunc_shl_16_v8i16_v8i32:
     54 ; SSE2:       # %bb.0:
     55 ; SSE2-NEXT:    xorps %xmm0, %xmm0
     56 ; SSE2-NEXT:    retq
     57 ;
     58 ; AVX2-LABEL: trunc_shl_16_v8i16_v8i32:
     59 ; AVX2:       # %bb.0:
     60 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29]
     61 ; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     62 ; AVX2-NEXT:    vzeroupper
     63 ; AVX2-NEXT:    retq
     64   %shl = shl <8 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
     65   %conv = trunc <8 x i32> %shl to <8 x i16>
     66   ret <8 x i16> %conv
     67 }
     68 
     69 define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) {
     70 ; SSE2-LABEL: trunc_shl_17_v8i16_v8i32:
     71 ; SSE2:       # %bb.0:
     72 ; SSE2-NEXT:    xorps %xmm0, %xmm0
     73 ; SSE2-NEXT:    retq
     74 ;
     75 ; AVX2-LABEL: trunc_shl_17_v8i16_v8i32:
     76 ; AVX2:       # %bb.0:
     77 ; AVX2-NEXT:    vpslld $17, %ymm0, %ymm0
     78 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
     79 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
     80 ; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     81 ; AVX2-NEXT:    vzeroupper
     82 ; AVX2-NEXT:    retq
     83   %shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
     84   %conv = trunc <8 x i32> %shl to <8 x i16>
     85   ret <8 x i16> %conv
     86 }
     87 
     88 define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) {
     89 ; SSE2-LABEL: trunc_shl_31_i32_i64:
     90 ; SSE2:       # %bb.0:
     91 ; SSE2-NEXT:    movl (%rsi), %eax
     92 ; SSE2-NEXT:    shll $31, %eax
     93 ; SSE2-NEXT:    movl %eax, (%rdi)
     94 ; SSE2-NEXT:    retq
     95 ;
     96 ; AVX2-LABEL: trunc_shl_31_i32_i64:
     97 ; AVX2:       # %bb.0:
     98 ; AVX2-NEXT:    movl (%rsi), %eax
     99 ; AVX2-NEXT:    shll $31, %eax
    100 ; AVX2-NEXT:    movl %eax, (%rdi)
    101 ; AVX2-NEXT:    retq
    102   %val = load i64, i64* %in
    103   %shl = shl i64 %val, 31
    104   %trunc = trunc i64 %shl to i32
    105   store i32 %trunc, i32* %out
    106   ret void
    107 }
    108 
    109 define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) {
    110 ; SSE2-LABEL: trunc_shl_32_i32_i64:
    111 ; SSE2:       # %bb.0:
    112 ; SSE2-NEXT:    movl $0, (%rdi)
    113 ; SSE2-NEXT:    retq
    114 ;
    115 ; AVX2-LABEL: trunc_shl_32_i32_i64:
    116 ; AVX2:       # %bb.0:
    117 ; AVX2-NEXT:    movl $0, (%rdi)
    118 ; AVX2-NEXT:    retq
    119   %val = load i64, i64* %in
    120   %shl = shl i64 %val, 32
    121   %trunc = trunc i64 %shl to i32
    122   store i32 %trunc, i32* %out
    123   ret void
    124 }
    125 
    126 define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) {
    127 ; SSE2-LABEL: trunc_shl_15_i16_i64:
    128 ; SSE2:       # %bb.0:
    129 ; SSE2-NEXT:    movl (%rsi), %eax
    130 ; SSE2-NEXT:    shll $15, %eax
    131 ; SSE2-NEXT:    movw %ax, (%rdi)
    132 ; SSE2-NEXT:    retq
    133 ;
    134 ; AVX2-LABEL: trunc_shl_15_i16_i64:
    135 ; AVX2:       # %bb.0:
    136 ; AVX2-NEXT:    movl (%rsi), %eax
    137 ; AVX2-NEXT:    shll $15, %eax
    138 ; AVX2-NEXT:    movw %ax, (%rdi)
    139 ; AVX2-NEXT:    retq
    140   %val = load i64, i64* %in
    141   %shl = shl i64 %val, 15
    142   %trunc = trunc i64 %shl to i16
    143   store i16 %trunc, i16* %out
    144   ret void
    145 }
    146 
    147 define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) {
    148 ; SSE2-LABEL: trunc_shl_16_i16_i64:
    149 ; SSE2:       # %bb.0:
    150 ; SSE2-NEXT:    movw $0, (%rdi)
    151 ; SSE2-NEXT:    retq
    152 ;
    153 ; AVX2-LABEL: trunc_shl_16_i16_i64:
    154 ; AVX2:       # %bb.0:
    155 ; AVX2-NEXT:    movw $0, (%rdi)
    156 ; AVX2-NEXT:    retq
    157   %val = load i64, i64* %in
    158   %shl = shl i64 %val, 16
    159   %trunc = trunc i64 %shl to i16
    160   store i16 %trunc, i16* %out
    161   ret void
    162 }
    163 
    164 define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) {
    165 ; SSE2-LABEL: trunc_shl_7_i8_i64:
    166 ; SSE2:       # %bb.0:
    167 ; SSE2-NEXT:    movb (%rsi), %al
    168 ; SSE2-NEXT:    shlb $7, %al
    169 ; SSE2-NEXT:    movb %al, (%rdi)
    170 ; SSE2-NEXT:    retq
    171 ;
    172 ; AVX2-LABEL: trunc_shl_7_i8_i64:
    173 ; AVX2:       # %bb.0:
    174 ; AVX2-NEXT:    movb (%rsi), %al
    175 ; AVX2-NEXT:    shlb $7, %al
    176 ; AVX2-NEXT:    movb %al, (%rdi)
    177 ; AVX2-NEXT:    retq
    178   %val = load i64, i64* %in
    179   %shl = shl i64 %val, 7
    180   %trunc = trunc i64 %shl to i8
    181   store i8 %trunc, i8* %out
    182   ret void
    183 }
    184 
    185 define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) {
    186 ; SSE2-LABEL: trunc_shl_8_i8_i64:
    187 ; SSE2:       # %bb.0:
    188 ; SSE2-NEXT:    movb $0, (%rdi)
    189 ; SSE2-NEXT:    retq
    190 ;
    191 ; AVX2-LABEL: trunc_shl_8_i8_i64:
    192 ; AVX2:       # %bb.0:
    193 ; AVX2-NEXT:    movb $0, (%rdi)
    194 ; AVX2-NEXT:    retq
    195   %val = load i64, i64* %in
    196   %shl = shl i64 %val, 8
    197   %trunc = trunc i64 %shl to i8
    198   store i8 %trunc, i8* %out
    199   ret void
    200 }
    201