1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX 4 5 ; First, check the generic pattern for any 2 vector constants. Then, check special cases where 6 ; the constants are all off-by-one. Finally, check the extra special cases where the constants 7 ; include 0 or -1. 8 ; Each minimal select test is repeated with a more typical pattern that includes a compare to 9 ; generate the condition value. 10 11 ; TODO: If we don't have blendv, this can definitely be improved. There's also a selection of 12 ; chips where it makes sense to transform the general case blendv to 2 bit-ops. That should be 13 ; a uarch-specfic transform. At some point (Ryzen?), the implementation should catch up to the 14 ; architecture, so blendv is as fast as a single bit-op. 15 16 define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { 17 ; SSE-LABEL: sel_C1_or_C2_vec: 18 ; SSE: # %bb.0: 19 ; SSE-NEXT: pslld $31, %xmm0 20 ; SSE-NEXT: psrad $31, %xmm0 21 ; SSE-NEXT: movdqa %xmm0, %xmm1 22 ; SSE-NEXT: pandn {{.*}}(%rip), %xmm1 23 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 24 ; SSE-NEXT: por %xmm1, %xmm0 25 ; SSE-NEXT: retq 26 ; 27 ; AVX-LABEL: sel_C1_or_C2_vec: 28 ; AVX: # %bb.0: 29 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0 30 ; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295] 31 ; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0 32 ; AVX-NEXT: retq 33 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 34 ret <4 x i32> %add 35 } 36 37 define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) { 38 ; SSE-LABEL: cmp_sel_C1_or_C2_vec: 39 ; SSE: # %bb.0: 40 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 41 ; SSE-NEXT: movdqa %xmm0, %xmm1 42 ; SSE-NEXT: pandn {{.*}}(%rip), %xmm1 43 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 44 ; SSE-NEXT: por %xmm1, %xmm0 45 ; SSE-NEXT: retq 46 ; 47 ; AVX-LABEL: cmp_sel_C1_or_C2_vec: 48 ; AVX: # %bb.0: 49 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 50 ; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295] 51 ; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0 52 ; AVX-NEXT: retq 53 %cond = icmp eq <4 x i32> %x, %y 54 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 55 ret <4 x i32> %add 56 } 57 58 define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { 59 ; SSE-LABEL: sel_Cplus1_or_C_vec: 60 ; SSE: # %bb.0: 61 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 62 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 63 ; SSE-NEXT: retq 64 ; 65 ; AVX-LABEL: sel_Cplus1_or_C_vec: 66 ; AVX: # %bb.0: 67 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 68 ; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 69 ; AVX-NEXT: retq 70 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 71 ret <4 x i32> %add 72 } 73 74 define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { 75 ; SSE-LABEL: cmp_sel_Cplus1_or_C_vec: 76 ; SSE: # %bb.0: 77 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 78 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [42,0,4294967294,4294967295] 79 ; SSE-NEXT: psubd %xmm0, %xmm1 80 ; SSE-NEXT: movdqa %xmm1, %xmm0 81 ; SSE-NEXT: retq 82 ; 83 ; AVX-LABEL: cmp_sel_Cplus1_or_C_vec: 84 ; AVX: # %bb.0: 85 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 86 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,0,4294967294,4294967295] 87 ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 88 ; AVX-NEXT: retq 89 %cond = icmp eq <4 x i32> %x, %y 90 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 91 ret <4 x i32> %add 92 } 93 94 define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { 95 ; SSE-LABEL: sel_Cminus1_or_C_vec: 96 ; SSE: # %bb.0: 97 ; SSE-NEXT: pslld $31, %xmm0 98 ; SSE-NEXT: psrad $31, %xmm0 99 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 100 ; SSE-NEXT: retq 101 ; 102 ; AVX-LABEL: sel_Cminus1_or_C_vec: 103 ; AVX: # %bb.0: 104 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0 105 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 106 ; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 107 ; AVX-NEXT: retq 108 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1> 109 ret <4 x i32> %add 110 } 111 112 define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { 113 ; SSE-LABEL: cmp_sel_Cminus1_or_C_vec: 114 ; SSE: # %bb.0: 115 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 116 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 117 ; SSE-NEXT: retq 118 ; 119 ; AVX-LABEL: cmp_sel_Cminus1_or_C_vec: 120 ; AVX: # %bb.0: 121 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 122 ; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 123 ; AVX-NEXT: retq 124 %cond = icmp eq <4 x i32> %x, %y 125 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1> 126 ret <4 x i32> %add 127 } 128 129 define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) { 130 ; SSE-LABEL: sel_minus1_or_0_vec: 131 ; SSE: # %bb.0: 132 ; SSE-NEXT: pslld $31, %xmm0 133 ; SSE-NEXT: psrad $31, %xmm0 134 ; SSE-NEXT: retq 135 ; 136 ; AVX-LABEL: sel_minus1_or_0_vec: 137 ; AVX: # %bb.0: 138 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0 139 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 140 ; AVX-NEXT: retq 141 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 142 ret <4 x i32> %add 143 } 144 145 define <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { 146 ; SSE-LABEL: cmp_sel_minus1_or_0_vec: 147 ; SSE: # %bb.0: 148 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 149 ; SSE-NEXT: retq 150 ; 151 ; AVX-LABEL: cmp_sel_minus1_or_0_vec: 152 ; AVX: # %bb.0: 153 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 154 ; AVX-NEXT: retq 155 %cond = icmp eq <4 x i32> %x, %y 156 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 157 ret <4 x i32> %add 158 } 159 160 define <4 x i32> @sel_0_or_minus1_vec(<4 x i1> %cond) { 161 ; SSE-LABEL: sel_0_or_minus1_vec: 162 ; SSE: # %bb.0: 163 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 164 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 165 ; SSE-NEXT: paddd %xmm1, %xmm0 166 ; SSE-NEXT: retq 167 ; 168 ; AVX-LABEL: sel_0_or_minus1_vec: 169 ; AVX: # %bb.0: 170 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 171 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 172 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 173 ; AVX-NEXT: retq 174 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 175 ret <4 x i32> %add 176 } 177 178 define <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) { 179 ; SSE-LABEL: cmp_sel_0_or_minus1_vec: 180 ; SSE: # %bb.0: 181 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 182 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 183 ; SSE-NEXT: pxor %xmm1, %xmm0 184 ; SSE-NEXT: retq 185 ; 186 ; AVX-LABEL: cmp_sel_0_or_minus1_vec: 187 ; AVX: # %bb.0: 188 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 189 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 190 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 191 ; AVX-NEXT: retq 192 %cond = icmp eq <4 x i32> %x, %y 193 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 194 ret <4 x i32> %add 195 } 196 197 define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) { 198 ; SSE-LABEL: sel_1_or_0_vec: 199 ; SSE: # %bb.0: 200 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 201 ; SSE-NEXT: retq 202 ; 203 ; AVX-LABEL: sel_1_or_0_vec: 204 ; AVX: # %bb.0: 205 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 206 ; AVX-NEXT: retq 207 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 208 ret <4 x i32> %add 209 } 210 211 define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { 212 ; SSE-LABEL: cmp_sel_1_or_0_vec: 213 ; SSE: # %bb.0: 214 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 215 ; SSE-NEXT: psrld $31, %xmm0 216 ; SSE-NEXT: retq 217 ; 218 ; AVX-LABEL: cmp_sel_1_or_0_vec: 219 ; AVX: # %bb.0: 220 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 221 ; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 222 ; AVX-NEXT: retq 223 %cond = icmp eq <4 x i32> %x, %y 224 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 225 ret <4 x i32> %add 226 } 227 228 define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) { 229 ; SSE-LABEL: sel_0_or_1_vec: 230 ; SSE: # %bb.0: 231 ; SSE-NEXT: andnps {{.*}}(%rip), %xmm0 232 ; SSE-NEXT: retq 233 ; 234 ; AVX-LABEL: sel_0_or_1_vec: 235 ; AVX: # %bb.0: 236 ; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0 237 ; AVX-NEXT: retq 238 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 239 ret <4 x i32> %add 240 } 241 242 define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { 243 ; SSE-LABEL: cmp_sel_0_or_1_vec: 244 ; SSE: # %bb.0: 245 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 246 ; SSE-NEXT: pandn {{.*}}(%rip), %xmm0 247 ; SSE-NEXT: retq 248 ; 249 ; AVX-LABEL: cmp_sel_0_or_1_vec: 250 ; AVX: # %bb.0: 251 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 252 ; AVX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 253 ; AVX-NEXT: retq 254 %cond = icmp eq <4 x i32> %x, %y 255 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 256 ret <4 x i32> %add 257 } 258 259