1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s 3 4 define i32 @and_self(i32 %x) { 5 ; CHECK-LABEL: and_self: 6 ; CHECK: # %bb.0: 7 ; CHECK-NEXT: movl %edi, %eax 8 ; CHECK-NEXT: retq 9 %and = and i32 %x, %x 10 ret i32 %and 11 } 12 13 define <4 x i32> @and_self_vec(<4 x i32> %x) { 14 ; CHECK-LABEL: and_self_vec: 15 ; CHECK: # %bb.0: 16 ; CHECK-NEXT: retq 17 %and = and <4 x i32> %x, %x 18 ret <4 x i32> %and 19 } 20 21 ; 22 ; Verify that the DAGCombiner is able to fold a vector AND into a blend 23 ; if one of the operands to the AND is a vector of all constants, and each 24 ; constant element is either zero or all-ones. 25 ; 26 27 define <4 x i32> @test1(<4 x i32> %A) { 28 ; CHECK-LABEL: test1: 29 ; CHECK: # %bb.0: 30 ; CHECK-NEXT: xorps %xmm1, %xmm1 31 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 32 ; CHECK-NEXT: retq 33 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0> 34 ret <4 x i32> %1 35 } 36 37 define <4 x i32> @test2(<4 x i32> %A) { 38 ; CHECK-LABEL: test2: 39 ; CHECK: # %bb.0: 40 ; CHECK-NEXT: xorps %xmm1, %xmm1 41 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 42 ; CHECK-NEXT: retq 43 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0> 44 ret <4 x i32> %1 45 } 46 47 define <4 x i32> @test3(<4 x i32> %A) { 48 ; CHECK-LABEL: test3: 49 ; CHECK: # %bb.0: 50 ; CHECK-NEXT: xorps %xmm1, %xmm1 51 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 52 ; CHECK-NEXT: retq 53 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0> 54 ret <4 x i32> %1 55 } 56 57 define <4 x i32> @test4(<4 x i32> %A) { 58 ; CHECK-LABEL: test4: 59 ; CHECK: # %bb.0: 60 ; CHECK-NEXT: xorps %xmm1, %xmm1 61 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 62 ; CHECK-NEXT: retq 63 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1> 64 ret <4 x i32> %1 65 } 66 67 define <4 x i32> @test5(<4 x i32> %A) { 68 ; CHECK-LABEL: test5: 69 ; CHECK: # %bb.0: 70 ; CHECK-NEXT: xorps %xmm1, %xmm1 71 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 72 ; CHECK-NEXT: retq 73 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 74 ret <4 x i32> %1 75 } 76 77 define <4 x i32> @test6(<4 x i32> %A) { 78 ; CHECK-LABEL: test6: 79 ; CHECK: # %bb.0: 80 ; CHECK-NEXT: xorps %xmm1, %xmm1 81 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 82 ; CHECK-NEXT: retq 83 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 84 ret <4 x i32> %1 85 } 86 87 define <4 x i32> @test7(<4 x i32> %A) { 88 ; CHECK-LABEL: test7: 89 ; CHECK: # %bb.0: 90 ; CHECK-NEXT: xorps %xmm1, %xmm1 91 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 92 ; CHECK-NEXT: retq 93 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1> 94 ret <4 x i32> %1 95 } 96 97 define <4 x i32> @test8(<4 x i32> %A) { 98 ; CHECK-LABEL: test8: 99 ; CHECK: # %bb.0: 100 ; CHECK-NEXT: xorps %xmm1, %xmm1 101 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 102 ; CHECK-NEXT: retq 103 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1> 104 ret <4 x i32> %1 105 } 106 107 define <4 x i32> @test9(<4 x i32> %A) { 108 ; CHECK-LABEL: test9: 109 ; CHECK: # %bb.0: 110 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 111 ; CHECK-NEXT: retq 112 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0> 113 ret <4 x i32> %1 114 } 115 116 define <4 x i32> @test10(<4 x i32> %A) { 117 ; CHECK-LABEL: test10: 118 ; CHECK: # %bb.0: 119 ; CHECK-NEXT: xorps %xmm1, %xmm1 120 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 121 ; CHECK-NEXT: retq 122 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0> 123 ret <4 x i32> %1 124 } 125 126 define <4 x i32> @test11(<4 x i32> %A) { 127 ; CHECK-LABEL: test11: 128 ; CHECK: # %bb.0: 129 ; CHECK-NEXT: xorps %xmm1, %xmm1 130 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 131 ; CHECK-NEXT: retq 132 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1> 133 ret <4 x i32> %1 134 } 135 136 define <4 x i32> @test12(<4 x i32> %A) { 137 ; CHECK-LABEL: test12: 138 ; CHECK: # %bb.0: 139 ; CHECK-NEXT: xorps %xmm1, %xmm1 140 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 141 ; CHECK-NEXT: retq 142 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0> 143 ret <4 x i32> %1 144 } 145 146 define <4 x i32> @test13(<4 x i32> %A) { 147 ; CHECK-LABEL: test13: 148 ; CHECK: # %bb.0: 149 ; CHECK-NEXT: xorps %xmm1, %xmm1 150 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 151 ; CHECK-NEXT: retq 152 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1> 153 ret <4 x i32> %1 154 } 155 156 define <4 x i32> @test14(<4 x i32> %A) { 157 ; CHECK-LABEL: test14: 158 ; CHECK: # %bb.0: 159 ; CHECK-NEXT: xorps %xmm1, %xmm1 160 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 161 ; CHECK-NEXT: retq 162 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 163 ret <4 x i32> %1 164 } 165 166 define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) { 167 ; CHECK-LABEL: test15: 168 ; CHECK: # %bb.0: 169 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 170 ; CHECK-NEXT: retq 171 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 172 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0> 173 %3 = or <4 x i32> %1, %2 174 ret <4 x i32> %3 175 } 176 177 define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) { 178 ; CHECK-LABEL: test16: 179 ; CHECK: # %bb.0: 180 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 181 ; CHECK-NEXT: retq 182 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 183 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1> 184 %3 = or <4 x i32> %1, %2 185 ret <4 x i32> %3 186 } 187 188 define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) { 189 ; CHECK-LABEL: test17: 190 ; CHECK: # %bb.0: 191 ; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 192 ; CHECK-NEXT: retq 193 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 194 %2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0> 195 %3 = or <4 x i32> %1, %2 196 ret <4 x i32> %3 197 } 198 199 ; 200 ; fold (and (or x, C), D) -> D if (C & D) == D 201 ; 202 203 define <2 x i64> @and_or_v2i64(<2 x i64> %a0) { 204 ; CHECK-LABEL: and_or_v2i64: 205 ; CHECK: # %bb.0: 206 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,8] 207 ; CHECK-NEXT: retq 208 %1 = or <2 x i64> %a0, <i64 255, i64 255> 209 %2 = and <2 x i64> %1, <i64 8, i64 8> 210 ret <2 x i64> %2 211 } 212 213 define <4 x i32> @and_or_v4i32(<4 x i32> %a0) { 214 ; CHECK-LABEL: and_or_v4i32: 215 ; CHECK: # %bb.0: 216 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] 217 ; CHECK-NEXT: retq 218 %1 = or <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15> 219 %2 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 220 ret <4 x i32> %2 221 } 222 223 define <8 x i16> @and_or_v8i16(<8 x i16> %a0) { 224 ; CHECK-LABEL: and_or_v8i16: 225 ; CHECK: # %bb.0: 226 ; CHECK-NEXT: movaps {{.*#+}} xmm0 = [15,7,3,1,14,10,2,32767] 227 ; CHECK-NEXT: retq 228 %1 = or <8 x i16> %a0, <i16 255, i16 127, i16 63, i16 31, i16 15, i16 31, i16 63, i16 -1> 229 %2 = and <8 x i16> %1, <i16 15, i16 7, i16 3, i16 1, i16 14, i16 10, i16 2, i16 32767> 230 ret <8 x i16> %2 231 } 232 233 ; 234 ; known bits folding 235 ; 236 237 define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) { 238 ; CHECK-LABEL: and_or_zext_v2i32: 239 ; CHECK: # %bb.0: 240 ; CHECK-NEXT: xorps %xmm0, %xmm0 241 ; CHECK-NEXT: retq 242 %1 = zext <2 x i32> %a0 to <2 x i64> 243 %2 = or <2 x i64> %1, <i64 1, i64 1> 244 %3 = and <2 x i64> %2, <i64 4294967296, i64 4294967296> 245 ret <2 x i64> %3 246 } 247 248 define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) { 249 ; CHECK-LABEL: and_or_zext_v4i16: 250 ; CHECK: # %bb.0: 251 ; CHECK-NEXT: xorps %xmm0, %xmm0 252 ; CHECK-NEXT: retq 253 %1 = zext <4 x i16> %a0 to <4 x i32> 254 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 255 %3 = and <4 x i32> %2, <i32 65536, i32 65536, i32 65536, i32 65536> 256 ret <4 x i32> %3 257 } 258 259 ; 260 ; known sign bits folding 261 ; 262 263 define <8 x i16> @ashr_mask1_v8i16(<8 x i16> %a0) { 264 ; CHECK-LABEL: ashr_mask1_v8i16: 265 ; CHECK: # %bb.0: 266 ; CHECK-NEXT: psrlw $15, %xmm0 267 ; CHECK-NEXT: retq 268 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 269 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 270 ret <8 x i16> %2 271 } 272 273 define <4 x i32> @ashr_mask7_v4i32(<4 x i32> %a0) { 274 ; CHECK-LABEL: ashr_mask7_v4i32: 275 ; CHECK: # %bb.0: 276 ; CHECK-NEXT: psrad $31, %xmm0 277 ; CHECK-NEXT: psrld $29, %xmm0 278 ; CHECK-NEXT: retq 279 %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 280 %2 = and <4 x i32> %1, <i32 7, i32 7, i32 7, i32 7> 281 ret <4 x i32> %2 282 } 283 284 ; 285 ; SimplifyDemandedBits 286 ; 287 288 ; PR34620 - redundant PAND after vector shift of a byte vector (PSRLW) 289 define <16 x i8> @PR34620(<16 x i8> %a0, <16 x i8> %a1) { 290 ; CHECK-LABEL: PR34620: 291 ; CHECK: # %bb.0: 292 ; CHECK-NEXT: psrlw $1, %xmm0 293 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 294 ; CHECK-NEXT: paddb %xmm1, %xmm0 295 ; CHECK-NEXT: retq 296 %1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 297 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 298 %3 = add <16 x i8> %2, %a1 299 ret <16 x i8> %3 300 } 301