Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
      7 
      8 define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
      9 ; SSE2-SSSE3-LABEL: bitcast_i2_2i1:
     10 ; SSE2-SSSE3:       # %bb.0:
     11 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
     12 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
     13 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
     14 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
     15 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
     16 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
     17 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
     18 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
     19 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
     20 ; SSE2-SSSE3-NEXT:    retq
     21 ;
     22 ; AVX1-LABEL: bitcast_i2_2i1:
     23 ; AVX1:       # %bb.0:
     24 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
     25 ; AVX1-NEXT:    vmovq %rdi, %xmm0
     26 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     27 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
     28 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     29 ; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
     30 ; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
     31 ; AVX1-NEXT:    retq
     32 ;
     33 ; AVX2-LABEL: bitcast_i2_2i1:
     34 ; AVX2:       # %bb.0:
     35 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
     36 ; AVX2-NEXT:    vmovq %rdi, %xmm0
     37 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
     38 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
     39 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     40 ; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
     41 ; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
     42 ; AVX2-NEXT:    retq
     43 ;
     44 ; AVX512-LABEL: bitcast_i2_2i1:
     45 ; AVX512:       # %bb.0:
     46 ; AVX512-NEXT:    kmovd %edi, %k1
     47 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
     48 ; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
     49 ; AVX512-NEXT:    retq
     50   %1 = bitcast i2 %a0 to <2 x i1>
     51   ret <2 x i1> %1
     52 }
     53 
     54 define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) {
     55 ; SSE2-SSSE3-LABEL: bitcast_i4_4i1:
     56 ; SSE2-SSSE3:       # %bb.0:
     57 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
     58 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     59 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
     60 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
     61 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
     62 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
     63 ; SSE2-SSSE3-NEXT:    retq
     64 ;
     65 ; AVX1-LABEL: bitcast_i4_4i1:
     66 ; AVX1:       # %bb.0:
     67 ; AVX1-NEXT:    vmovd %edi, %xmm0
     68 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     69 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
     70 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     71 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     72 ; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
     73 ; AVX1-NEXT:    retq
     74 ;
     75 ; AVX2-LABEL: bitcast_i4_4i1:
     76 ; AVX2:       # %bb.0:
     77 ; AVX2-NEXT:    vmovd %edi, %xmm0
     78 ; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
     79 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
     80 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     81 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     82 ; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
     83 ; AVX2-NEXT:    retq
     84 ;
     85 ; AVX512-LABEL: bitcast_i4_4i1:
     86 ; AVX512:       # %bb.0:
     87 ; AVX512-NEXT:    kmovd %edi, %k1
     88 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
     89 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
     90 ; AVX512-NEXT:    retq
     91   %1 = bitcast i4 %a0 to <4 x i1>
     92   ret <4 x i1> %1
     93 }
     94 
     95 define <8 x i1> @bitcast_i8_8i1(i8 zeroext %a0) {
     96 ; SSE2-SSSE3-LABEL: bitcast_i8_8i1:
     97 ; SSE2-SSSE3:       # %bb.0:
     98 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
     99 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    100 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    101 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    102 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    103 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
    104 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
    105 ; SSE2-SSSE3-NEXT:    retq
    106 ;
    107 ; AVX1-LABEL: bitcast_i8_8i1:
    108 ; AVX1:       # %bb.0:
    109 ; AVX1-NEXT:    vmovd %edi, %xmm0
    110 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    111 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    112 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    113 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    114 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    115 ; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
    116 ; AVX1-NEXT:    retq
    117 ;
    118 ; AVX2-LABEL: bitcast_i8_8i1:
    119 ; AVX2:       # %bb.0:
    120 ; AVX2-NEXT:    vmovd %edi, %xmm0
    121 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
    122 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    123 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    124 ; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    125 ; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
    126 ; AVX2-NEXT:    retq
    127 ;
    128 ; AVX512-LABEL: bitcast_i8_8i1:
    129 ; AVX512:       # %bb.0:
    130 ; AVX512-NEXT:    kmovd %edi, %k0
    131 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0
    132 ; AVX512-NEXT:    retq
    133   %1 = bitcast i8 %a0 to <8 x i1>
    134   ret <8 x i1> %1
    135 }
    136 
    137 define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
    138 ; SSE2-LABEL: bitcast_i16_16i1:
    139 ; SSE2:       # %bb.0:
    140 ; SSE2-NEXT:    movd %edi, %xmm0
    141 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    142 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
    143 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    144 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    145 ; SSE2-NEXT:    pand %xmm1, %xmm0
    146 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
    147 ; SSE2-NEXT:    psrlw $7, %xmm0
    148 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    149 ; SSE2-NEXT:    retq
    150 ;
    151 ; SSSE3-LABEL: bitcast_i16_16i1:
    152 ; SSSE3:       # %bb.0:
    153 ; SSSE3-NEXT:    movd %edi, %xmm0
    154 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    155 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    156 ; SSSE3-NEXT:    pand %xmm1, %xmm0
    157 ; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
    158 ; SSSE3-NEXT:    psrlw $7, %xmm0
    159 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm0
    160 ; SSSE3-NEXT:    retq
    161 ;
    162 ; AVX1-LABEL: bitcast_i16_16i1:
    163 ; AVX1:       # %bb.0:
    164 ; AVX1-NEXT:    vmovd %edi, %xmm0
    165 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    166 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
    167 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    168 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
    169 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
    170 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    171 ; AVX1-NEXT:    retq
    172 ;
    173 ; AVX2-LABEL: bitcast_i16_16i1:
    174 ; AVX2:       # %bb.0:
    175 ; AVX2-NEXT:    vmovd %edi, %xmm0
    176 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    177 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
    178 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    179 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
    180 ; AVX2-NEXT:    vpsrlw $7, %xmm0, %xmm0
    181 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    182 ; AVX2-NEXT:    retq
    183 ;
    184 ; AVX512-LABEL: bitcast_i16_16i1:
    185 ; AVX512:       # %bb.0:
    186 ; AVX512-NEXT:    kmovd %edi, %k0
    187 ; AVX512-NEXT:    vpmovm2b %k0, %xmm0
    188 ; AVX512-NEXT:    retq
    189   %1 = bitcast i16 %a0 to <16 x i1>
    190   ret <16 x i1> %1
    191 }
    192 
    193 define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
    194 ; SSE2-SSSE3-LABEL: bitcast_i32_32i1:
    195 ; SSE2-SSSE3:       # %bb.0:
    196 ; SSE2-SSSE3-NEXT:    movl %esi, (%rdi)
    197 ; SSE2-SSSE3-NEXT:    movq %rdi, %rax
    198 ; SSE2-SSSE3-NEXT:    retq
    199 ;
    200 ; AVX1-LABEL: bitcast_i32_32i1:
    201 ; AVX1:       # %bb.0:
    202 ; AVX1-NEXT:    vmovd %edi, %xmm0
    203 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    204 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
    205 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    206 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
    207 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    208 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    209 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    210 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    211 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    212 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
    213 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    214 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    215 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
    216 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    217 ; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
    218 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
    219 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    220 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
    221 ; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
    222 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    223 ; AVX1-NEXT:    retq
    224 ;
    225 ; AVX2-LABEL: bitcast_i32_32i1:
    226 ; AVX2:       # %bb.0:
    227 ; AVX2-NEXT:    vmovd %edi, %xmm0
    228 ; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    229 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
    230 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    231 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
    232 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    233 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    234 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    235 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    236 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    237 ; AVX2-NEXT:    vpsrlw $7, %ymm0, %ymm0
    238 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    239 ; AVX2-NEXT:    retq
    240 ;
    241 ; AVX512-LABEL: bitcast_i32_32i1:
    242 ; AVX512:       # %bb.0:
    243 ; AVX512-NEXT:    kmovd %edi, %k0
    244 ; AVX512-NEXT:    vpmovm2b %k0, %ymm0
    245 ; AVX512-NEXT:    retq
    246   %1 = bitcast i32 %a0 to <32 x i1>
    247   ret <32 x i1> %1
    248 }
    249 
    250 define <64 x i1> @bitcast_i64_64i1(i64 %a0) {
    251 ; SSE2-SSSE3-LABEL: bitcast_i64_64i1:
    252 ; SSE2-SSSE3:       # %bb.0:
    253 ; SSE2-SSSE3-NEXT:    movq %rsi, (%rdi)
    254 ; SSE2-SSSE3-NEXT:    movq %rdi, %rax
    255 ; SSE2-SSSE3-NEXT:    retq
    256 ;
    257 ; AVX12-LABEL: bitcast_i64_64i1:
    258 ; AVX12:       # %bb.0:
    259 ; AVX12-NEXT:    movq %rsi, (%rdi)
    260 ; AVX12-NEXT:    movq %rdi, %rax
    261 ; AVX12-NEXT:    retq
    262 ;
    263 ; AVX512-LABEL: bitcast_i64_64i1:
    264 ; AVX512:       # %bb.0:
    265 ; AVX512-NEXT:    kmovq %rdi, %k0
    266 ; AVX512-NEXT:    vpmovm2b %k0, %zmm0
    267 ; AVX512-NEXT:    retq
    268   %1 = bitcast i64 %a0 to <64 x i1>
    269   ret <64 x i1> %1
    270 }
    271