Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx  | FileCheck %s --check-prefix=ALL --check-prefix=AVX
      4 
      5 ; First, check the generic pattern for any 2 vector constants. Then, check special cases where
      6 ; the constants are all off-by-one. Finally, check the extra special cases where the constants
      7 ; include 0 or -1.
      8 ; Each minimal select test is repeated with a more typical pattern that includes a compare to
      9 ; generate the condition value.
     10 
     11 ; TODO: If we don't have blendv, this can definitely be improved. There's also a selection of 
     12 ; chips where it makes sense to transform the general case blendv to 2 bit-ops. That should be
     13 ; a uarch-specfic transform. At some point (Ryzen?), the implementation should catch up to the 
     14 ; architecture, so blendv is as fast as a single bit-op.
     15 
     16 define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
     17 ; SSE-LABEL: sel_C1_or_C2_vec:
     18 ; SSE:       # %bb.0:
     19 ; SSE-NEXT:    pslld $31, %xmm0
     20 ; SSE-NEXT:    psrad $31, %xmm0
     21 ; SSE-NEXT:    movdqa %xmm0, %xmm1
     22 ; SSE-NEXT:    pandn {{.*}}(%rip), %xmm1
     23 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
     24 ; SSE-NEXT:    por %xmm1, %xmm0
     25 ; SSE-NEXT:    retq
     26 ;
     27 ; AVX-LABEL: sel_C1_or_C2_vec:
     28 ; AVX:       # %bb.0:
     29 ; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
     30 ; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
     31 ; AVX-NEXT:    vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
     32 ; AVX-NEXT:    retq
     33   %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
     34   ret <4 x i32> %add
     35 }
     36 
     37 define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
     38 ; SSE-LABEL: cmp_sel_C1_or_C2_vec:
     39 ; SSE:       # %bb.0:
     40 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
     41 ; SSE-NEXT:    movdqa %xmm0, %xmm1
     42 ; SSE-NEXT:    pandn {{.*}}(%rip), %xmm1
     43 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
     44 ; SSE-NEXT:    por %xmm1, %xmm0
     45 ; SSE-NEXT:    retq
     46 ;
     47 ; AVX-LABEL: cmp_sel_C1_or_C2_vec:
     48 ; AVX:       # %bb.0:
     49 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     50 ; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
     51 ; AVX-NEXT:    vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
     52 ; AVX-NEXT:    retq
     53   %cond = icmp eq <4 x i32> %x, %y
     54   %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
     55   ret <4 x i32> %add
     56 }
     57 
     58 define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
     59 ; SSE-LABEL: sel_Cplus1_or_C_vec:
     60 ; SSE:       # %bb.0:
     61 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
     62 ; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
     63 ; SSE-NEXT:    retq
     64 ;
     65 ; AVX-LABEL: sel_Cplus1_or_C_vec:
     66 ; AVX:       # %bb.0:
     67 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
     68 ; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
     69 ; AVX-NEXT:    retq
     70   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
     71   ret <4 x i32> %add
     72 }
     73 
     74 define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
     75 ; SSE-LABEL: cmp_sel_Cplus1_or_C_vec:
     76 ; SSE:       # %bb.0:
     77 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
     78 ; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
     79 ; SSE-NEXT:    psubd %xmm0, %xmm1
     80 ; SSE-NEXT:    movdqa %xmm1, %xmm0
     81 ; SSE-NEXT:    retq
     82 ;
     83 ; AVX-LABEL: cmp_sel_Cplus1_or_C_vec:
     84 ; AVX:       # %bb.0:
     85 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     86 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
     87 ; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
     88 ; AVX-NEXT:    retq
     89   %cond = icmp eq <4 x i32> %x, %y
     90   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
     91   ret <4 x i32> %add
     92 }
     93 
     94 define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
     95 ; SSE-LABEL: sel_Cminus1_or_C_vec:
     96 ; SSE:       # %bb.0:
     97 ; SSE-NEXT:    pslld $31, %xmm0
     98 ; SSE-NEXT:    psrad $31, %xmm0
     99 ; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
    100 ; SSE-NEXT:    retq
    101 ;
    102 ; AVX-LABEL: sel_Cminus1_or_C_vec:
    103 ; AVX:       # %bb.0:
    104 ; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
    105 ; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
    106 ; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
    107 ; AVX-NEXT:    retq
    108   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
    109   ret <4 x i32> %add
    110 }
    111 
    112 define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
    113 ; SSE-LABEL: cmp_sel_Cminus1_or_C_vec:
    114 ; SSE:       # %bb.0:
    115 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
    116 ; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
    117 ; SSE-NEXT:    retq
    118 ;
    119 ; AVX-LABEL: cmp_sel_Cminus1_or_C_vec:
    120 ; AVX:       # %bb.0:
    121 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    122 ; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
    123 ; AVX-NEXT:    retq
    124   %cond = icmp eq <4 x i32> %x, %y
    125   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
    126   ret <4 x i32> %add
    127 }
    128 
    129 define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) {
    130 ; SSE-LABEL: sel_minus1_or_0_vec:
    131 ; SSE:       # %bb.0:
    132 ; SSE-NEXT:    pslld $31, %xmm0
    133 ; SSE-NEXT:    psrad $31, %xmm0
    134 ; SSE-NEXT:    retq
    135 ;
    136 ; AVX-LABEL: sel_minus1_or_0_vec:
    137 ; AVX:       # %bb.0:
    138 ; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
    139 ; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
    140 ; AVX-NEXT:    retq
    141   %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    142   ret <4 x i32> %add
    143 }
    144 
    145 define <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
    146 ; SSE-LABEL: cmp_sel_minus1_or_0_vec:
    147 ; SSE:       # %bb.0:
    148 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
    149 ; SSE-NEXT:    retq
    150 ;
    151 ; AVX-LABEL: cmp_sel_minus1_or_0_vec:
    152 ; AVX:       # %bb.0:
    153 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    154 ; AVX-NEXT:    retq
    155   %cond = icmp eq <4 x i32> %x, %y
    156   %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    157   ret <4 x i32> %add
    158 }
    159 
    160 define <4 x i32> @sel_0_or_minus1_vec(<4 x i1> %cond) {
    161 ; SSE-LABEL: sel_0_or_minus1_vec:
    162 ; SSE:       # %bb.0:
    163 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
    164 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
    165 ; SSE-NEXT:    paddd %xmm1, %xmm0
    166 ; SSE-NEXT:    retq
    167 ;
    168 ; AVX-LABEL: sel_0_or_minus1_vec:
    169 ; AVX:       # %bb.0:
    170 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    171 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    172 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    173 ; AVX-NEXT:    retq
    174   %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
    175   ret <4 x i32> %add
    176 }
    177 
    178 define <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) {
    179 ; SSE-LABEL: cmp_sel_0_or_minus1_vec:
    180 ; SSE:       # %bb.0:
    181 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
    182 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
    183 ; SSE-NEXT:    pxor %xmm1, %xmm0
    184 ; SSE-NEXT:    retq
    185 ;
    186 ; AVX-LABEL: cmp_sel_0_or_minus1_vec:
    187 ; AVX:       # %bb.0:
    188 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    189 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    190 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    191 ; AVX-NEXT:    retq
    192   %cond = icmp eq <4 x i32> %x, %y
    193   %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
    194   ret <4 x i32> %add
    195 }
    196 
    197 define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) {
    198 ; SSE-LABEL: sel_1_or_0_vec:
    199 ; SSE:       # %bb.0:
    200 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    201 ; SSE-NEXT:    retq
    202 ;
    203 ; AVX-LABEL: sel_1_or_0_vec:
    204 ; AVX:       # %bb.0:
    205 ; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
    206 ; AVX-NEXT:    retq
    207   %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    208   ret <4 x i32> %add
    209 }
    210 
    211 define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
    212 ; SSE-LABEL: cmp_sel_1_or_0_vec:
    213 ; SSE:       # %bb.0:
    214 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
    215 ; SSE-NEXT:    psrld $31, %xmm0
    216 ; SSE-NEXT:    retq
    217 ;
    218 ; AVX-LABEL: cmp_sel_1_or_0_vec:
    219 ; AVX:       # %bb.0:
    220 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    221 ; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
    222 ; AVX-NEXT:    retq
    223   %cond = icmp eq <4 x i32> %x, %y
    224   %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    225   ret <4 x i32> %add
    226 }
    227 
    228 define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
    229 ; SSE-LABEL: sel_0_or_1_vec:
    230 ; SSE:       # %bb.0:
    231 ; SSE-NEXT:    andnps {{.*}}(%rip), %xmm0
    232 ; SSE-NEXT:    retq
    233 ;
    234 ; AVX-LABEL: sel_0_or_1_vec:
    235 ; AVX:       # %bb.0:
    236 ; AVX-NEXT:    vandnps {{.*}}(%rip), %xmm0, %xmm0
    237 ; AVX-NEXT:    retq
    238   %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    239   ret <4 x i32> %add
    240 }
    241 
    242 define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
    243 ; SSE-LABEL: cmp_sel_0_or_1_vec:
    244 ; SSE:       # %bb.0:
    245 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
    246 ; SSE-NEXT:    pandn {{.*}}(%rip), %xmm0
    247 ; SSE-NEXT:    retq
    248 ;
    249 ; AVX-LABEL: cmp_sel_0_or_1_vec:
    250 ; AVX:       # %bb.0:
    251 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    252 ; AVX-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
    253 ; AVX-NEXT:    retq
    254   %cond = icmp eq <4 x i32> %x, %y
    255   %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    256   ret <4 x i32> %add
    257 }
    258 
    259