1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s 4 5 define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 6 ; CHECK-LABEL: andpd256: 7 ; CHECK: # BB#0: # %entry 8 ; CHECK-NEXT: vandpd %ymm0, %ymm1, %ymm0 9 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 10 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 11 ; CHECK-NEXT: retq 12 entry: 13 %0 = bitcast <4 x double> %x to <4 x i64> 14 %1 = bitcast <4 x double> %y to <4 x i64> 15 %and.i = and <4 x i64> %0, %1 16 %2 = bitcast <4 x i64> %and.i to <4 x double> 17 ; add forces execution domain 18 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 19 ret <4 x double> %3 20 } 21 22 define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 23 ; CHECK-LABEL: andpd256fold: 24 ; CHECK: # BB#0: # %entry 25 ; CHECK-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0 26 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 27 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 28 ; CHECK-NEXT: retq 29 entry: 30 %0 = bitcast <4 x double> %y to <4 x i64> 31 %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 32 %1 = bitcast <4 x i64> %and.i to <4 x double> 33 ; add forces execution domain 34 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 35 ret <4 x double> %2 36 } 37 38 define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 39 ; CHECK-LABEL: andps256: 40 ; CHECK: # BB#0: # %entry 41 ; CHECK-NEXT: vandps %ymm0, %ymm1, %ymm0 42 ; CHECK-NEXT: retq 43 entry: 44 %0 = bitcast <8 x float> %x to <8 x i32> 45 %1 = bitcast <8 x float> %y to <8 x i32> 46 %and.i = and <8 x i32> %0, %1 47 %2 = bitcast <8 x i32> %and.i to <8 x float> 48 ret <8 x float> %2 49 } 50 51 define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 52 ; CHECK-LABEL: andps256fold: 53 ; CHECK: # BB#0: # %entry 54 ; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 55 ; CHECK-NEXT: retq 56 entry: 57 %0 = bitcast <8 x float> %y to <8 x i32> 58 %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 59 %1 = bitcast <8 x i32> %and.i to <8 x float> 60 ret <8 x float> %1 61 } 62 63 define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 64 ; CHECK-LABEL: xorpd256: 65 ; CHECK: # BB#0: # %entry 66 ; CHECK-NEXT: vxorpd %ymm0, %ymm1, %ymm0 67 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 68 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 69 ; CHECK-NEXT: retq 70 entry: 71 %0 = bitcast <4 x double> %x to <4 x i64> 72 %1 = bitcast <4 x double> %y to <4 x i64> 73 %xor.i = xor <4 x i64> %0, %1 74 %2 = bitcast <4 x i64> %xor.i to <4 x double> 75 ; add forces execution domain 76 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 77 ret <4 x double> %3 78 } 79 80 define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 81 ; CHECK-LABEL: xorpd256fold: 82 ; CHECK: # BB#0: # %entry 83 ; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 84 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 85 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 86 ; CHECK-NEXT: retq 87 entry: 88 %0 = bitcast <4 x double> %y to <4 x i64> 89 %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 90 %1 = bitcast <4 x i64> %xor.i to <4 x double> 91 ; add forces execution domain 92 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 93 ret <4 x double> %2 94 } 95 96 define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 97 ; CHECK-LABEL: xorps256: 98 ; CHECK: # BB#0: # %entry 99 ; CHECK-NEXT: vxorps %ymm0, %ymm1, %ymm0 100 ; CHECK-NEXT: retq 101 entry: 102 %0 = bitcast <8 x float> %x to <8 x i32> 103 %1 = bitcast <8 x float> %y to <8 x i32> 104 %xor.i = xor <8 x i32> %0, %1 105 %2 = bitcast <8 x i32> %xor.i to <8 x float> 106 ret <8 x float> %2 107 } 108 109 define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 110 ; CHECK-LABEL: xorps256fold: 111 ; CHECK: # BB#0: # %entry 112 ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 113 ; CHECK-NEXT: retq 114 entry: 115 %0 = bitcast <8 x float> %y to <8 x i32> 116 %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 117 %1 = bitcast <8 x i32> %xor.i to <8 x float> 118 ret <8 x float> %1 119 } 120 121 define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 122 ; CHECK-LABEL: orpd256: 123 ; CHECK: # BB#0: # %entry 124 ; CHECK-NEXT: vorpd %ymm0, %ymm1, %ymm0 125 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 126 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 127 ; CHECK-NEXT: retq 128 entry: 129 %0 = bitcast <4 x double> %x to <4 x i64> 130 %1 = bitcast <4 x double> %y to <4 x i64> 131 %or.i = or <4 x i64> %0, %1 132 %2 = bitcast <4 x i64> %or.i to <4 x double> 133 ; add forces execution domain 134 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 135 ret <4 x double> %3 136 } 137 138 define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 139 ; CHECK-LABEL: orpd256fold: 140 ; CHECK: # BB#0: # %entry 141 ; CHECK-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0 142 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 143 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 144 ; CHECK-NEXT: retq 145 entry: 146 %0 = bitcast <4 x double> %y to <4 x i64> 147 %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 148 %1 = bitcast <4 x i64> %or.i to <4 x double> 149 ; add forces execution domain 150 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 151 ret <4 x double> %2 152 } 153 154 define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 155 ; CHECK-LABEL: orps256: 156 ; CHECK: # BB#0: # %entry 157 ; CHECK-NEXT: vorps %ymm0, %ymm1, %ymm0 158 ; CHECK-NEXT: retq 159 entry: 160 %0 = bitcast <8 x float> %x to <8 x i32> 161 %1 = bitcast <8 x float> %y to <8 x i32> 162 %or.i = or <8 x i32> %0, %1 163 %2 = bitcast <8 x i32> %or.i to <8 x float> 164 ret <8 x float> %2 165 } 166 167 define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 168 ; CHECK-LABEL: orps256fold: 169 ; CHECK: # BB#0: # %entry 170 ; CHECK-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0 171 ; CHECK-NEXT: retq 172 entry: 173 %0 = bitcast <8 x float> %y to <8 x i32> 174 %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 175 %1 = bitcast <8 x i32> %or.i to <8 x float> 176 ret <8 x float> %1 177 } 178 179 define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 180 ; CHECK-LABEL: andnotpd256: 181 ; CHECK: # BB#0: # %entry 182 ; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0 183 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 184 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 185 ; CHECK-NEXT: retq 186 entry: 187 %0 = bitcast <4 x double> %x to <4 x i64> 188 %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 189 %1 = bitcast <4 x double> %y to <4 x i64> 190 %and.i = and <4 x i64> %1, %neg.i 191 %2 = bitcast <4 x i64> %and.i to <4 x double> 192 ; add forces execution domain 193 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 194 ret <4 x double> %3 195 } 196 197 define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { 198 ; CHECK-LABEL: andnotpd256fold: 199 ; CHECK: # BB#0: # %entry 200 ; CHECK-NEXT: vandnpd (%rdi), %ymm0, %ymm0 201 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 202 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 203 ; CHECK-NEXT: retq 204 entry: 205 %tmp2 = load <4 x double>, <4 x double>* %x, align 32 206 %0 = bitcast <4 x double> %y to <4 x i64> 207 %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 208 %1 = bitcast <4 x double> %tmp2 to <4 x i64> 209 %and.i = and <4 x i64> %1, %neg.i 210 %2 = bitcast <4 x i64> %and.i to <4 x double> 211 ; add forces execution domain 212 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 213 ret <4 x double> %3 214 } 215 216 define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 217 ; CHECK-LABEL: andnotps256: 218 ; CHECK: # BB#0: # %entry 219 ; CHECK-NEXT: vandnps %ymm0, %ymm1, %ymm0 220 ; CHECK-NEXT: retq 221 entry: 222 %0 = bitcast <8 x float> %x to <8 x i32> 223 %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 224 %1 = bitcast <8 x float> %y to <8 x i32> 225 %and.i = and <8 x i32> %1, %neg.i 226 %2 = bitcast <8 x i32> %and.i to <8 x float> 227 ret <8 x float> %2 228 } 229 230 define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { 231 ; CHECK-LABEL: andnotps256fold: 232 ; CHECK: # BB#0: # %entry 233 ; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 234 ; CHECK-NEXT: retq 235 entry: 236 %tmp2 = load <8 x float>, <8 x float>* %x, align 32 237 %0 = bitcast <8 x float> %y to <8 x i32> 238 %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 239 %1 = bitcast <8 x float> %tmp2 to <8 x i32> 240 %and.i = and <8 x i32> %1, %neg.i 241 %2 = bitcast <8 x i32> %and.i to <8 x float> 242 ret <8 x float> %2 243 } 244 245 ;;; Test that basic 2 x i64 logic use the integer version on AVX 246 247 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 248 ; CHECK-LABEL: vpandn: 249 ; CHECK: # BB#0: # %entry 250 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 251 ; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 252 ; CHECK-NEXT: retq 253 entry: 254 ; Force the execution domain with an add. 255 %a2 = add <2 x i64> %a, <i64 1, i64 1> 256 %y = xor <2 x i64> %a2, <i64 -1, i64 -1> 257 %x = and <2 x i64> %a, %y 258 ret <2 x i64> %x 259 } 260 261 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 262 ; CHECK-LABEL: vpand: 263 ; CHECK: # BB#0: # %entry 264 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 265 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 266 ; CHECK-NEXT: retq 267 entry: 268 ; Force the execution domain with an add. 269 %a2 = add <2 x i64> %a, <i64 1, i64 1> 270 %x = and <2 x i64> %a2, %b 271 ret <2 x i64> %x 272 } 273 274