1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ANY,AVX1 3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ANY,INT256,AVX2 4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=ANY,INT256,AVX512 5 6 define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 7 ; ANY-LABEL: andpd256: 8 ; ANY: # %bb.0: # %entry 9 ; ANY-NEXT: vandpd %ymm0, %ymm1, %ymm0 10 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 11 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 12 ; ANY-NEXT: retq 13 entry: 14 %0 = bitcast <4 x double> %x to <4 x i64> 15 %1 = bitcast <4 x double> %y to <4 x i64> 16 %and.i = and <4 x i64> %0, %1 17 %2 = bitcast <4 x i64> %and.i to <4 x double> 18 ; add forces execution domain 19 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 20 ret <4 x double> %3 21 } 22 23 define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 24 ; ANY-LABEL: andpd256fold: 25 ; ANY: # %bb.0: # %entry 26 ; ANY-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0 27 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 28 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 29 ; ANY-NEXT: retq 30 entry: 31 %0 = bitcast <4 x double> %y to <4 x i64> 32 %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 33 %1 = bitcast <4 x i64> %and.i to <4 x double> 34 ; add forces execution domain 35 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 36 ret <4 x double> %2 37 } 38 39 define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 40 ; ANY-LABEL: andps256: 41 ; ANY: # %bb.0: # %entry 42 ; ANY-NEXT: vandps %ymm0, %ymm1, %ymm0 43 ; ANY-NEXT: retq 44 entry: 45 %0 = bitcast <8 x float> %x to <8 x i32> 46 %1 = bitcast <8 x float> %y to <8 x i32> 47 %and.i = and <8 x i32> %0, %1 48 %2 = bitcast <8 x i32> %and.i to <8 x float> 49 ret <8 x float> %2 50 } 51 52 define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 53 ; ANY-LABEL: andps256fold: 54 ; ANY: # %bb.0: # %entry 55 ; ANY-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 56 ; ANY-NEXT: retq 57 entry: 58 %0 = bitcast <8 x float> %y to <8 x i32> 59 %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 60 %1 = bitcast <8 x i32> %and.i to <8 x float> 61 ret <8 x float> %1 62 } 63 64 define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 65 ; ANY-LABEL: xorpd256: 66 ; ANY: # %bb.0: # %entry 67 ; ANY-NEXT: vxorpd %ymm0, %ymm1, %ymm0 68 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 69 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 70 ; ANY-NEXT: retq 71 entry: 72 %0 = bitcast <4 x double> %x to <4 x i64> 73 %1 = bitcast <4 x double> %y to <4 x i64> 74 %xor.i = xor <4 x i64> %0, %1 75 %2 = bitcast <4 x i64> %xor.i to <4 x double> 76 ; add forces execution domain 77 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 78 ret <4 x double> %3 79 } 80 81 define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 82 ; ANY-LABEL: xorpd256fold: 83 ; ANY: # %bb.0: # %entry 84 ; ANY-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 85 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 86 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 87 ; ANY-NEXT: retq 88 entry: 89 %0 = bitcast <4 x double> %y to <4 x i64> 90 %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 91 %1 = bitcast <4 x i64> %xor.i to <4 x double> 92 ; add forces execution domain 93 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 94 ret <4 x double> %2 95 } 96 97 define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 98 ; ANY-LABEL: xorps256: 99 ; ANY: # %bb.0: # %entry 100 ; ANY-NEXT: vxorps %ymm0, %ymm1, %ymm0 101 ; ANY-NEXT: retq 102 entry: 103 %0 = bitcast <8 x float> %x to <8 x i32> 104 %1 = bitcast <8 x float> %y to <8 x i32> 105 %xor.i = xor <8 x i32> %0, %1 106 %2 = bitcast <8 x i32> %xor.i to <8 x float> 107 ret <8 x float> %2 108 } 109 110 define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 111 ; ANY-LABEL: xorps256fold: 112 ; ANY: # %bb.0: # %entry 113 ; ANY-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 114 ; ANY-NEXT: retq 115 entry: 116 %0 = bitcast <8 x float> %y to <8 x i32> 117 %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 118 %1 = bitcast <8 x i32> %xor.i to <8 x float> 119 ret <8 x float> %1 120 } 121 122 define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 123 ; ANY-LABEL: orpd256: 124 ; ANY: # %bb.0: # %entry 125 ; ANY-NEXT: vorpd %ymm0, %ymm1, %ymm0 126 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 127 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 128 ; ANY-NEXT: retq 129 entry: 130 %0 = bitcast <4 x double> %x to <4 x i64> 131 %1 = bitcast <4 x double> %y to <4 x i64> 132 %or.i = or <4 x i64> %0, %1 133 %2 = bitcast <4 x i64> %or.i to <4 x double> 134 ; add forces execution domain 135 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 136 ret <4 x double> %3 137 } 138 139 define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 140 ; ANY-LABEL: orpd256fold: 141 ; ANY: # %bb.0: # %entry 142 ; ANY-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0 143 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 144 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 145 ; ANY-NEXT: retq 146 entry: 147 %0 = bitcast <4 x double> %y to <4 x i64> 148 %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507> 149 %1 = bitcast <4 x i64> %or.i to <4 x double> 150 ; add forces execution domain 151 %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0> 152 ret <4 x double> %2 153 } 154 155 define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 156 ; ANY-LABEL: orps256: 157 ; ANY: # %bb.0: # %entry 158 ; ANY-NEXT: vorps %ymm0, %ymm1, %ymm0 159 ; ANY-NEXT: retq 160 entry: 161 %0 = bitcast <8 x float> %x to <8 x i32> 162 %1 = bitcast <8 x float> %y to <8 x i32> 163 %or.i = or <8 x i32> %0, %1 164 %2 = bitcast <8 x i32> %or.i to <8 x float> 165 ret <8 x float> %2 166 } 167 168 define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 169 ; ANY-LABEL: orps256fold: 170 ; ANY: # %bb.0: # %entry 171 ; ANY-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0 172 ; ANY-NEXT: retq 173 entry: 174 %0 = bitcast <8 x float> %y to <8 x i32> 175 %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938> 176 %1 = bitcast <8 x i32> %or.i to <8 x float> 177 ret <8 x float> %1 178 } 179 180 define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 181 ; ANY-LABEL: andnotpd256: 182 ; ANY: # %bb.0: # %entry 183 ; ANY-NEXT: vandnpd %ymm0, %ymm1, %ymm0 184 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 185 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 186 ; ANY-NEXT: retq 187 entry: 188 %0 = bitcast <4 x double> %x to <4 x i64> 189 %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 190 %1 = bitcast <4 x double> %y to <4 x i64> 191 %and.i = and <4 x i64> %1, %neg.i 192 %2 = bitcast <4 x i64> %and.i to <4 x double> 193 ; add forces execution domain 194 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 195 ret <4 x double> %3 196 } 197 198 define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { 199 ; ANY-LABEL: andnotpd256fold: 200 ; ANY: # %bb.0: # %entry 201 ; ANY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 202 ; ANY-NEXT: vxorpd %xmm1, %xmm1, %xmm1 203 ; ANY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 204 ; ANY-NEXT: retq 205 entry: 206 %tmp2 = load <4 x double>, <4 x double>* %x, align 32 207 %0 = bitcast <4 x double> %y to <4 x i64> 208 %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 209 %1 = bitcast <4 x double> %tmp2 to <4 x i64> 210 %and.i = and <4 x i64> %1, %neg.i 211 %2 = bitcast <4 x i64> %and.i to <4 x double> 212 ; add forces execution domain 213 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 214 ret <4 x double> %3 215 } 216 217 define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 218 ; ANY-LABEL: andnotps256: 219 ; ANY: # %bb.0: # %entry 220 ; ANY-NEXT: vandnps %ymm0, %ymm1, %ymm0 221 ; ANY-NEXT: retq 222 entry: 223 %0 = bitcast <8 x float> %x to <8 x i32> 224 %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 225 %1 = bitcast <8 x float> %y to <8 x i32> 226 %and.i = and <8 x i32> %1, %neg.i 227 %2 = bitcast <8 x i32> %and.i to <8 x float> 228 ret <8 x float> %2 229 } 230 231 define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { 232 ; ANY-LABEL: andnotps256fold: 233 ; ANY: # %bb.0: # %entry 234 ; ANY-NEXT: vandnps (%rdi), %ymm0, %ymm0 235 ; ANY-NEXT: retq 236 entry: 237 %tmp2 = load <8 x float>, <8 x float>* %x, align 32 238 %0 = bitcast <8 x float> %y to <8 x i32> 239 %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 240 %1 = bitcast <8 x float> %tmp2 to <8 x i32> 241 %and.i = and <8 x i32> %1, %neg.i 242 %2 = bitcast <8 x i32> %and.i to <8 x float> 243 ret <8 x float> %2 244 } 245 246 ;;; Test that basic 2 x i64 logic use the integer version on AVX 247 248 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 249 ; Force the execution domain with an add. 250 ; ANY-LABEL: vpandn: 251 ; ANY: # %bb.0: 252 ; ANY-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 253 ; ANY-NEXT: vpsubq %xmm1, %xmm0, %xmm1 254 ; ANY-NEXT: vpandn %xmm0, %xmm1, %xmm0 255 ; ANY-NEXT: retq 256 %a2 = add <2 x i64> %a, <i64 1, i64 1> 257 %y = xor <2 x i64> %a2, <i64 -1, i64 -1> 258 %x = and <2 x i64> %a, %y 259 ret <2 x i64> %x 260 } 261 262 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 263 ; Force the execution domain with an add. 264 ; ANY-LABEL: vpand: 265 ; ANY: # %bb.0: 266 ; ANY-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 267 ; ANY-NEXT: vpsubq %xmm2, %xmm0, %xmm0 268 ; ANY-NEXT: vpand %xmm1, %xmm0, %xmm0 269 ; ANY-NEXT: retq 270 %a2 = add <2 x i64> %a, <i64 1, i64 1> 271 %x = and <2 x i64> %a2, %b 272 ret <2 x i64> %x 273 } 274 275 define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind { 276 ; AVX1-LABEL: and_xor_splat1_v4i32: 277 ; AVX1: # %bb.0: 278 ; AVX1-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0 279 ; AVX1-NEXT: retq 280 ; 281 ; INT256-LABEL: and_xor_splat1_v4i32: 282 ; INT256: # %bb.0: 283 ; INT256-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 284 ; INT256-NEXT: vandnps %xmm1, %xmm0, %xmm0 285 ; INT256-NEXT: retq 286 %xor = xor <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 287 %and = and <4 x i32> %xor, <i32 1, i32 1, i32 1, i32 1> 288 ret <4 x i32> %and 289 } 290 291 define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind { 292 ; AVX1-LABEL: and_xor_splat1_v4i64: 293 ; AVX1: # %bb.0: 294 ; AVX1-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm0 295 ; AVX1-NEXT: retq 296 ; 297 ; INT256-LABEL: and_xor_splat1_v4i64: 298 ; INT256: # %bb.0: 299 ; INT256-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] 300 ; INT256-NEXT: vandnps %ymm1, %ymm0, %ymm0 301 ; INT256-NEXT: retq 302 %xor = xor <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1> 303 %and = and <4 x i64> %xor, <i64 1, i64 1, i64 1, i64 1> 304 ret <4 x i64> %and 305 } 306 307 ; PR37749 - https://bugs.llvm.org/show_bug.cgi?id=37749 308 ; For AVX1, we don't want a 256-bit logic op with insert/extract to the surrounding 128-bit ops. 309 310 define <8 x i32> @and_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 311 ; AVX1-LABEL: and_disguised_i8_elts: 312 ; AVX1: # %bb.0: 313 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3 314 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 315 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 316 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 317 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735] 318 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 319 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 320 ; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 321 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1 322 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 323 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 324 ; AVX1-NEXT: retq 325 ; 326 ; INT256-LABEL: and_disguised_i8_elts: 327 ; INT256: # %bb.0: 328 ; INT256-NEXT: vpaddd %ymm1, %ymm0, %ymm0 329 ; INT256-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 330 ; INT256-NEXT: vpaddd %ymm2, %ymm0, %ymm0 331 ; INT256-NEXT: retq 332 %a = add <8 x i32> %x, %y 333 %l = and <8 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 334 %t = add <8 x i32> %l, %z 335 ret <8 x i32> %t 336 } 337 338 define <8 x i32> @or_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 339 ; AVX1-LABEL: or_disguised_i8_elts: 340 ; AVX1: # %bb.0: 341 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3 342 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 343 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 344 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 345 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735] 346 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 347 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 348 ; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 349 ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 350 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 351 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 352 ; AVX1-NEXT: retq 353 ; 354 ; INT256-LABEL: or_disguised_i8_elts: 355 ; INT256: # %bb.0: 356 ; INT256-NEXT: vpaddd %ymm1, %ymm0, %ymm0 357 ; INT256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] 358 ; INT256-NEXT: vpor %ymm1, %ymm0, %ymm0 359 ; INT256-NEXT: vpaddd %ymm2, %ymm0, %ymm0 360 ; INT256-NEXT: retq 361 %a = add <8 x i32> %x, %y 362 %l = or <8 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 363 %t = add <8 x i32> %l, %z 364 ret <8 x i32> %t 365 } 366 367 define <8 x i32> @xor_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 368 ; AVX1-LABEL: xor_disguised_i8_elts: 369 ; AVX1: # %bb.0: 370 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3 371 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 372 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 373 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 374 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735] 375 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 376 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 377 ; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 378 ; AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm1 379 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 380 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 381 ; AVX1-NEXT: retq 382 ; 383 ; INT256-LABEL: xor_disguised_i8_elts: 384 ; INT256: # %bb.0: 385 ; INT256-NEXT: vpaddd %ymm1, %ymm0, %ymm0 386 ; INT256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] 387 ; INT256-NEXT: vpxor %ymm1, %ymm0, %ymm0 388 ; INT256-NEXT: vpaddd %ymm2, %ymm0, %ymm0 389 ; INT256-NEXT: retq 390 %a = add <8 x i32> %x, %y 391 %l = xor <8 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 392 %t = add <8 x i32> %l, %z 393 ret <8 x i32> %t 394 } 395 396 define <8 x i32> @and_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 397 ; AVX1-LABEL: and_disguised_i16_elts: 398 ; AVX1: # %bb.0: 399 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3 400 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 401 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 402 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 403 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 404 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 405 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 406 ; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 407 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7] 408 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 409 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 410 ; AVX1-NEXT: retq 411 ; 412 ; INT256-LABEL: and_disguised_i16_elts: 413 ; INT256: # %bb.0: 414 ; INT256-NEXT: vpaddd %ymm1, %ymm0, %ymm0 415 ; INT256-NEXT: vpxor %xmm1, %xmm1, %xmm1 416 ; INT256-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 417 ; INT256-NEXT: vpaddd %ymm2, %ymm0, %ymm0 418 ; INT256-NEXT: retq 419 %a = add <8 x i32> %x, %y 420 %l = and <8 x i32> %a, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 421 %t = add <8 x i32> %l, %z 422 ret <8 x i32> %t 423 } 424 425 define <8 x i32> @or_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 426 ; AVX1-LABEL: or_disguised_i16_elts: 427 ; AVX1: # %bb.0: 428 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3 429 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 430 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 431 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 432 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [281470681808895,281470681808895] 433 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 434 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 435 ; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 436 ; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 437 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 438 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 439 ; AVX1-NEXT: retq 440 ; 441 ; INT256-LABEL: or_disguised_i16_elts: 442 ; INT256: # %bb.0: 443 ; INT256-NEXT: vpaddd %ymm1, %ymm0, %ymm0 444 ; INT256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] 445 ; INT256-NEXT: vpor %ymm1, %ymm0, %ymm0 446 ; INT256-NEXT: vpaddd %ymm2, %ymm0, %ymm0 447 ; INT256-NEXT: retq 448 %a = add <8 x i32> %x, %y 449 %l = or <8 x i32> %a, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 450 %t = add <8 x i32> %l, %z 451 ret <8 x i32> %t 452 } 453 454 define <8 x i32> @xor_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 455 ; AVX1-LABEL: xor_disguised_i16_elts: 456 ; AVX1: # %bb.0: 457 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm3 458 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 459 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 460 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 461 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [281470681808895,281470681808895] 462 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 463 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 464 ; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 465 ; AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm1 466 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 467 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 468 ; AVX1-NEXT: retq 469 ; 470 ; INT256-LABEL: xor_disguised_i16_elts: 471 ; INT256: # %bb.0: 472 ; INT256-NEXT: vpaddd %ymm1, %ymm0, %ymm0 473 ; INT256-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] 474 ; INT256-NEXT: vpxor %ymm1, %ymm0, %ymm0 475 ; INT256-NEXT: vpaddd %ymm2, %ymm0, %ymm0 476 ; INT256-NEXT: retq 477 %a = add <8 x i32> %x, %y 478 %l = xor <8 x i32> %a, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 479 %t = add <8 x i32> %l, %z 480 ret <8 x i32> %t 481 } 482 483