1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5 6 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 7 ; ALL-LABEL: vpandd: 8 ; ALL: ## BB#0: ## %entry 9 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 10 ; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0 11 ; ALL-NEXT: retq 12 entry: 13 ; Force the execution domain with an add. 14 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 15 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 16 %x = and <16 x i32> %a2, %b 17 ret <16 x i32> %x 18 } 19 20 define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 21 ; ALL-LABEL: vpandnd: 22 ; ALL: ## BB#0: ## %entry 23 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 24 ; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0 25 ; ALL-NEXT: retq 26 entry: 27 ; Force the execution domain with an add. 28 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 29 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 30 %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, 31 i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 32 %x = and <16 x i32> %a2, %b2 33 ret <16 x i32> %x 34 } 35 36 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 37 ; ALL-LABEL: vpord: 38 ; ALL: ## BB#0: ## %entry 39 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 40 ; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0 41 ; ALL-NEXT: retq 42 entry: 43 ; Force the execution domain with an add. 44 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 45 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 46 %x = or <16 x i32> %a2, %b 47 ret <16 x i32> %x 48 } 49 50 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 51 ; ALL-LABEL: vpxord: 52 ; ALL: ## BB#0: ## %entry 53 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 54 ; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0 55 ; ALL-NEXT: retq 56 entry: 57 ; Force the execution domain with an add. 58 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 59 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 60 %x = xor <16 x i32> %a2, %b 61 ret <16 x i32> %x 62 } 63 64 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 65 ; ALL-LABEL: vpandq: 66 ; ALL: ## BB#0: ## %entry 67 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 68 ; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0 69 ; ALL-NEXT: retq 70 entry: 71 ; Force the execution domain with an add. 72 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 73 %x = and <8 x i64> %a2, %b 74 ret <8 x i64> %x 75 } 76 77 define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 78 ; ALL-LABEL: vpandnq: 79 ; ALL: ## BB#0: ## %entry 80 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 81 ; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0 82 ; ALL-NEXT: retq 83 entry: 84 ; Force the execution domain with an add. 85 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 86 %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 87 %x = and <8 x i64> %a2, %b2 88 ret <8 x i64> %x 89 } 90 91 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 92 ; ALL-LABEL: vporq: 93 ; ALL: ## BB#0: ## %entry 94 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 95 ; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0 96 ; ALL-NEXT: retq 97 entry: 98 ; Force the execution domain with an add. 99 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 100 %x = or <8 x i64> %a2, %b 101 ret <8 x i64> %x 102 } 103 104 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 105 ; ALL-LABEL: vpxorq: 106 ; ALL: ## BB#0: ## %entry 107 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 108 ; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 109 ; ALL-NEXT: retq 110 entry: 111 ; Force the execution domain with an add. 112 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 113 %x = xor <8 x i64> %a2, %b 114 ret <8 x i64> %x 115 } 116 117 118 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 119 ; ALL-LABEL: orq_broadcast: 120 ; ALL: ## BB#0: 121 ; ALL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 122 ; ALL-NEXT: retq 123 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 124 ret <8 x i64> %b 125 } 126 127 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 128 ; ALL-LABEL: andd512fold: 129 ; ALL: ## BB#0: ## %entry 130 ; ALL-NEXT: vpandd (%rdi), %zmm0, %zmm0 131 ; ALL-NEXT: retq 132 entry: 133 %a = load <16 x i32>, <16 x i32>* %x, align 4 134 %b = and <16 x i32> %y, %a 135 ret <16 x i32> %b 136 } 137 138 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 139 ; ALL-LABEL: andqbrst: 140 ; ALL: ## BB#0: ## %entry 141 ; ALL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 142 ; ALL-NEXT: retq 143 entry: 144 %a = load i64, i64* %ap, align 8 145 %b = insertelement <8 x i64> undef, i64 %a, i32 0 146 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 147 %d = and <8 x i64> %p1, %c 148 ret <8 x i64>%d 149 } 150 151 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { 152 ; KNL-LABEL: and_v64i8: 153 ; KNL: ## BB#0: 154 ; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0 155 ; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1 156 ; KNL-NEXT: retq 157 ; 158 ; SKX-LABEL: and_v64i8: 159 ; SKX: ## BB#0: 160 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 161 ; SKX-NEXT: retq 162 %res = and <64 x i8> %a, %b 163 ret <64 x i8> %res 164 } 165 166 define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { 167 ; KNL-LABEL: andn_v64i8: 168 ; KNL: ## BB#0: 169 ; KNL-NEXT: vandnps %ymm0, %ymm2, %ymm0 170 ; KNL-NEXT: vandnps %ymm1, %ymm3, %ymm1 171 ; KNL-NEXT: retq 172 ; 173 ; SKX-LABEL: andn_v64i8: 174 ; SKX: ## BB#0: 175 ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 176 ; SKX-NEXT: retq 177 %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 178 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 179 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 180 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 181 %res = and <64 x i8> %a, %b2 182 ret <64 x i8> %res 183 } 184 185 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { 186 ; KNL-LABEL: or_v64i8: 187 ; KNL: ## BB#0: 188 ; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0 189 ; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1 190 ; KNL-NEXT: retq 191 ; 192 ; SKX-LABEL: or_v64i8: 193 ; SKX: ## BB#0: 194 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 195 ; SKX-NEXT: retq 196 %res = or <64 x i8> %a, %b 197 ret <64 x i8> %res 198 } 199 200 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { 201 ; KNL-LABEL: xor_v64i8: 202 ; KNL: ## BB#0: 203 ; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0 204 ; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1 205 ; KNL-NEXT: retq 206 ; 207 ; SKX-LABEL: xor_v64i8: 208 ; SKX: ## BB#0: 209 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 210 ; SKX-NEXT: retq 211 %res = xor <64 x i8> %a, %b 212 ret <64 x i8> %res 213 } 214 215 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { 216 ; KNL-LABEL: and_v32i16: 217 ; KNL: ## BB#0: 218 ; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0 219 ; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1 220 ; KNL-NEXT: retq 221 ; 222 ; SKX-LABEL: and_v32i16: 223 ; SKX: ## BB#0: 224 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 225 ; SKX-NEXT: retq 226 %res = and <32 x i16> %a, %b 227 ret <32 x i16> %res 228 } 229 230 define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { 231 ; KNL-LABEL: andn_v32i16: 232 ; KNL: ## BB#0: 233 ; KNL-NEXT: vandnps %ymm0, %ymm2, %ymm0 234 ; KNL-NEXT: vandnps %ymm1, %ymm3, %ymm1 235 ; KNL-NEXT: retq 236 ; 237 ; SKX-LABEL: andn_v32i16: 238 ; SKX: ## BB#0: 239 ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 240 ; SKX-NEXT: retq 241 %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, 242 i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 243 %res = and <32 x i16> %a, %b2 244 ret <32 x i16> %res 245 } 246 247 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { 248 ; KNL-LABEL: or_v32i16: 249 ; KNL: ## BB#0: 250 ; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0 251 ; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1 252 ; KNL-NEXT: retq 253 ; 254 ; SKX-LABEL: or_v32i16: 255 ; SKX: ## BB#0: 256 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 257 ; SKX-NEXT: retq 258 %res = or <32 x i16> %a, %b 259 ret <32 x i16> %res 260 } 261 262 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { 263 ; KNL-LABEL: xor_v32i16: 264 ; KNL: ## BB#0: 265 ; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0 266 ; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1 267 ; KNL-NEXT: retq 268 ; 269 ; SKX-LABEL: xor_v32i16: 270 ; SKX: ## BB#0: 271 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 272 ; SKX-NEXT: retq 273 %res = xor <32 x i16> %a, %b 274 ret <32 x i16> %res 275 } 276