1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX 4 5 ; This test is an assembly of avx512 instructions to check their scheduling 6 7 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 8 ; GENERIC-LABEL: addpd512: 9 ; GENERIC: # %bb.0: # %entry 10 ; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 11 ; GENERIC-NEXT: retq # sched: [1:1.00] 12 ; 13 ; SKX-LABEL: addpd512: 14 ; SKX: # %bb.0: # %entry 15 ; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 16 ; SKX-NEXT: retq # sched: [7:1.00] 17 entry: 18 %add.i = fadd <8 x double> %x, %y 19 ret <8 x double> %add.i 20 } 21 22 define <8 x double> @addpd512fold(<8 x double> %y) { 23 ; GENERIC-LABEL: addpd512fold: 24 ; GENERIC: # %bb.0: # %entry 25 ; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] 26 ; GENERIC-NEXT: retq # sched: [1:1.00] 27 ; 28 ; SKX-LABEL: addpd512fold: 29 ; SKX: # %bb.0: # %entry 30 ; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 31 ; SKX-NEXT: retq # sched: [7:1.00] 32 entry: 33 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 34 ret <8 x double> %add.i 35 } 36 37 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 38 ; GENERIC-LABEL: addps512: 39 ; GENERIC: # %bb.0: # %entry 40 ; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 41 ; GENERIC-NEXT: retq # sched: [1:1.00] 42 ; 43 ; SKX-LABEL: addps512: 44 ; SKX: # %bb.0: # %entry 45 ; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 46 ; SKX-NEXT: retq # sched: [7:1.00] 47 entry: 48 %add.i = fadd <16 x float> %x, %y 49 ret <16 x float> %add.i 50 } 51 52 define <16 x float> @addps512fold(<16 x float> %y) { 53 ; GENERIC-LABEL: addps512fold: 54 ; GENERIC: # %bb.0: # %entry 55 ; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] 56 ; GENERIC-NEXT: retq # sched: [1:1.00] 57 ; 58 ; SKX-LABEL: addps512fold: 59 ; SKX: # %bb.0: # %entry 60 ; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 61 ; SKX-NEXT: retq # sched: [7:1.00] 62 entry: 63 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 64 ret <16 x float> %add.i 65 } 66 67 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 68 ; GENERIC-LABEL: subpd512: 69 ; GENERIC: # %bb.0: # %entry 70 ; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 71 ; GENERIC-NEXT: retq # sched: [1:1.00] 72 ; 73 ; SKX-LABEL: subpd512: 74 ; SKX: # %bb.0: # %entry 75 ; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 76 ; SKX-NEXT: retq # sched: [7:1.00] 77 entry: 78 %sub.i = fsub <8 x double> %x, %y 79 ret <8 x double> %sub.i 80 } 81 82 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 83 ; GENERIC-LABEL: subpd512fold: 84 ; GENERIC: # %bb.0: # %entry 85 ; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 86 ; GENERIC-NEXT: retq # sched: [1:1.00] 87 ; 88 ; SKX-LABEL: subpd512fold: 89 ; SKX: # %bb.0: # %entry 90 ; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50] 91 ; SKX-NEXT: retq # sched: [7:1.00] 92 entry: 93 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 94 %sub.i = fsub <8 x double> %y, %tmp2 95 ret <8 x double> %sub.i 96 } 97 98 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 99 ; GENERIC-LABEL: subps512: 100 ; GENERIC: # %bb.0: # %entry 101 ; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 102 ; GENERIC-NEXT: retq # sched: [1:1.00] 103 ; 104 ; SKX-LABEL: subps512: 105 ; SKX: # %bb.0: # %entry 106 ; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 107 ; SKX-NEXT: retq # sched: [7:1.00] 108 entry: 109 %sub.i = fsub <16 x float> %x, %y 110 ret <16 x float> %sub.i 111 } 112 113 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 114 ; GENERIC-LABEL: subps512fold: 115 ; GENERIC: # %bb.0: # %entry 116 ; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 117 ; GENERIC-NEXT: retq # sched: [1:1.00] 118 ; 119 ; SKX-LABEL: subps512fold: 120 ; SKX: # %bb.0: # %entry 121 ; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] 122 ; SKX-NEXT: retq # sched: [7:1.00] 123 entry: 124 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 125 %sub.i = fsub <16 x float> %y, %tmp2 126 ret <16 x float> %sub.i 127 } 128 129 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 130 ; GENERIC-LABEL: imulq512: 131 ; GENERIC: # %bb.0: 132 ; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 133 ; GENERIC-NEXT: retq # sched: [1:1.00] 134 ; 135 ; SKX-LABEL: imulq512: 136 ; SKX: # %bb.0: 137 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50] 138 ; SKX-NEXT: retq # sched: [7:1.00] 139 %z = mul <8 x i64>%x, %y 140 ret <8 x i64>%z 141 } 142 143 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 144 ; GENERIC-LABEL: imulq256: 145 ; GENERIC: # %bb.0: 146 ; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00] 147 ; GENERIC-NEXT: retq # sched: [1:1.00] 148 ; 149 ; SKX-LABEL: imulq256: 150 ; SKX: # %bb.0: 151 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50] 152 ; SKX-NEXT: retq # sched: [7:1.00] 153 %z = mul <4 x i64>%x, %y 154 ret <4 x i64>%z 155 } 156 157 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 158 ; GENERIC-LABEL: imulq128: 159 ; GENERIC: # %bb.0: 160 ; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00] 161 ; GENERIC-NEXT: retq # sched: [1:1.00] 162 ; 163 ; SKX-LABEL: imulq128: 164 ; SKX: # %bb.0: 165 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50] 166 ; SKX-NEXT: retq # sched: [7:1.00] 167 %z = mul <2 x i64>%x, %y 168 ret <2 x i64>%z 169 } 170 171 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 172 ; GENERIC-LABEL: mulpd512: 173 ; GENERIC: # %bb.0: # %entry 174 ; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 175 ; GENERIC-NEXT: retq # sched: [1:1.00] 176 ; 177 ; SKX-LABEL: mulpd512: 178 ; SKX: # %bb.0: # %entry 179 ; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 180 ; SKX-NEXT: retq # sched: [7:1.00] 181 entry: 182 %mul.i = fmul <8 x double> %x, %y 183 ret <8 x double> %mul.i 184 } 185 186 define <8 x double> @mulpd512fold(<8 x double> %y) { 187 ; GENERIC-LABEL: mulpd512fold: 188 ; GENERIC: # %bb.0: # %entry 189 ; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] 190 ; GENERIC-NEXT: retq # sched: [1:1.00] 191 ; 192 ; SKX-LABEL: mulpd512fold: 193 ; SKX: # %bb.0: # %entry 194 ; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 195 ; SKX-NEXT: retq # sched: [7:1.00] 196 entry: 197 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 198 ret <8 x double> %mul.i 199 } 200 201 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 202 ; GENERIC-LABEL: mulps512: 203 ; GENERIC: # %bb.0: # %entry 204 ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 205 ; GENERIC-NEXT: retq # sched: [1:1.00] 206 ; 207 ; SKX-LABEL: mulps512: 208 ; SKX: # %bb.0: # %entry 209 ; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 210 ; SKX-NEXT: retq # sched: [7:1.00] 211 entry: 212 %mul.i = fmul <16 x float> %x, %y 213 ret <16 x float> %mul.i 214 } 215 216 define <16 x float> @mulps512fold(<16 x float> %y) { 217 ; GENERIC-LABEL: mulps512fold: 218 ; GENERIC: # %bb.0: # %entry 219 ; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] 220 ; GENERIC-NEXT: retq # sched: [1:1.00] 221 ; 222 ; SKX-LABEL: mulps512fold: 223 ; SKX: # %bb.0: # %entry 224 ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 225 ; SKX-NEXT: retq # sched: [7:1.00] 226 entry: 227 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 228 ret <16 x float> %mul.i 229 } 230 231 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 232 ; GENERIC-LABEL: divpd512: 233 ; GENERIC: # %bb.0: # %entry 234 ; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00] 235 ; GENERIC-NEXT: retq # sched: [1:1.00] 236 ; 237 ; SKX-LABEL: divpd512: 238 ; SKX: # %bb.0: # %entry 239 ; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00] 240 ; SKX-NEXT: retq # sched: [7:1.00] 241 entry: 242 %div.i = fdiv <8 x double> %x, %y 243 ret <8 x double> %div.i 244 } 245 246 define <8 x double> @divpd512fold(<8 x double> %y) { 247 ; GENERIC-LABEL: divpd512fold: 248 ; GENERIC: # %bb.0: # %entry 249 ; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00] 250 ; GENERIC-NEXT: retq # sched: [1:1.00] 251 ; 252 ; SKX-LABEL: divpd512fold: 253 ; SKX: # %bb.0: # %entry 254 ; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00] 255 ; SKX-NEXT: retq # sched: [7:1.00] 256 entry: 257 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 258 ret <8 x double> %div.i 259 } 260 261 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 262 ; GENERIC-LABEL: divps512: 263 ; GENERIC: # %bb.0: # %entry 264 ; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00] 265 ; GENERIC-NEXT: retq # sched: [1:1.00] 266 ; 267 ; SKX-LABEL: divps512: 268 ; SKX: # %bb.0: # %entry 269 ; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00] 270 ; SKX-NEXT: retq # sched: [7:1.00] 271 entry: 272 %div.i = fdiv <16 x float> %x, %y 273 ret <16 x float> %div.i 274 } 275 276 define <16 x float> @divps512fold(<16 x float> %y) { 277 ; GENERIC-LABEL: divps512fold: 278 ; GENERIC: # %bb.0: # %entry 279 ; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00] 280 ; GENERIC-NEXT: retq # sched: [1:1.00] 281 ; 282 ; SKX-LABEL: divps512fold: 283 ; SKX: # %bb.0: # %entry 284 ; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00] 285 ; SKX-NEXT: retq # sched: [7:1.00] 286 entry: 287 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 288 ret <16 x float> %div.i 289 } 290 291 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 292 ; GENERIC-LABEL: vpaddq_test: 293 ; GENERIC: # %bb.0: 294 ; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 295 ; GENERIC-NEXT: retq # sched: [1:1.00] 296 ; 297 ; SKX-LABEL: vpaddq_test: 298 ; SKX: # %bb.0: 299 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 300 ; SKX-NEXT: retq # sched: [7:1.00] 301 %x = add <8 x i64> %i, %j 302 ret <8 x i64> %x 303 } 304 305 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 306 ; GENERIC-LABEL: vpaddq_fold_test: 307 ; GENERIC: # %bb.0: 308 ; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 309 ; GENERIC-NEXT: retq # sched: [1:1.00] 310 ; 311 ; SKX-LABEL: vpaddq_fold_test: 312 ; SKX: # %bb.0: 313 ; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 314 ; SKX-NEXT: retq # sched: [7:1.00] 315 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 316 %x = add <8 x i64> %i, %tmp 317 ret <8 x i64> %x 318 } 319 320 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 321 ; GENERIC-LABEL: vpaddq_broadcast_test: 322 ; GENERIC: # %bb.0: 323 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 324 ; GENERIC-NEXT: retq # sched: [1:1.00] 325 ; 326 ; SKX-LABEL: vpaddq_broadcast_test: 327 ; SKX: # %bb.0: 328 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 329 ; SKX-NEXT: retq # sched: [7:1.00] 330 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 331 ret <8 x i64> %x 332 } 333 334 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 335 ; GENERIC-LABEL: vpaddq_broadcast2_test: 336 ; GENERIC: # %bb.0: 337 ; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 338 ; GENERIC-NEXT: retq # sched: [1:1.00] 339 ; 340 ; SKX-LABEL: vpaddq_broadcast2_test: 341 ; SKX: # %bb.0: 342 ; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 343 ; SKX-NEXT: retq # sched: [7:1.00] 344 %tmp = load i64, i64* %j 345 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 346 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 347 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 348 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 349 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 350 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 351 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 352 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 353 %x = add <8 x i64> %i, %j.7 354 ret <8 x i64> %x 355 } 356 357 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 358 ; GENERIC-LABEL: vpaddd_test: 359 ; GENERIC: # %bb.0: 360 ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 361 ; GENERIC-NEXT: retq # sched: [1:1.00] 362 ; 363 ; SKX-LABEL: vpaddd_test: 364 ; SKX: # %bb.0: 365 ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 366 ; SKX-NEXT: retq # sched: [7:1.00] 367 %x = add <16 x i32> %i, %j 368 ret <16 x i32> %x 369 } 370 371 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 372 ; GENERIC-LABEL: vpaddd_fold_test: 373 ; GENERIC: # %bb.0: 374 ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 375 ; GENERIC-NEXT: retq # sched: [1:1.00] 376 ; 377 ; SKX-LABEL: vpaddd_fold_test: 378 ; SKX: # %bb.0: 379 ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 380 ; SKX-NEXT: retq # sched: [7:1.00] 381 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 382 %x = add <16 x i32> %i, %tmp 383 ret <16 x i32> %x 384 } 385 386 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 387 ; GENERIC-LABEL: vpaddd_broadcast_test: 388 ; GENERIC: # %bb.0: 389 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 390 ; GENERIC-NEXT: retq # sched: [1:1.00] 391 ; 392 ; SKX-LABEL: vpaddd_broadcast_test: 393 ; SKX: # %bb.0: 394 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 395 ; SKX-NEXT: retq # sched: [7:1.00] 396 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 397 ret <16 x i32> %x 398 } 399 400 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 401 ; GENERIC-LABEL: vpaddd_mask_test: 402 ; GENERIC: # %bb.0: 403 ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 404 ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50] 405 ; GENERIC-NEXT: retq # sched: [1:1.00] 406 ; 407 ; SKX-LABEL: vpaddd_mask_test: 408 ; SKX: # %bb.0: 409 ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 410 ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33] 411 ; SKX-NEXT: retq # sched: [7:1.00] 412 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 413 %x = add <16 x i32> %i, %j 414 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 415 ret <16 x i32> %r 416 } 417 418 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 419 ; GENERIC-LABEL: vpaddd_maskz_test: 420 ; GENERIC: # %bb.0: 421 ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 422 ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 423 ; GENERIC-NEXT: retq # sched: [1:1.00] 424 ; 425 ; SKX-LABEL: vpaddd_maskz_test: 426 ; SKX: # %bb.0: 427 ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 428 ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 429 ; SKX-NEXT: retq # sched: [7:1.00] 430 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 431 %x = add <16 x i32> %i, %j 432 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 433 ret <16 x i32> %r 434 } 435 436 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 437 ; GENERIC-LABEL: vpaddd_mask_fold_test: 438 ; GENERIC: # %bb.0: 439 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 440 ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] 441 ; GENERIC-NEXT: retq # sched: [1:1.00] 442 ; 443 ; SKX-LABEL: vpaddd_mask_fold_test: 444 ; SKX: # %bb.0: 445 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 446 ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] 447 ; SKX-NEXT: retq # sched: [7:1.00] 448 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 449 %j = load <16 x i32>, <16 x i32>* %j.ptr 450 %x = add <16 x i32> %i, %j 451 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 452 ret <16 x i32> %r 453 } 454 455 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 456 ; GENERIC-LABEL: vpaddd_mask_broadcast_test: 457 ; GENERIC: # %bb.0: 458 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 459 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] 460 ; GENERIC-NEXT: retq # sched: [1:1.00] 461 ; 462 ; SKX-LABEL: vpaddd_mask_broadcast_test: 463 ; SKX: # %bb.0: 464 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 465 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] 466 ; SKX-NEXT: retq # sched: [7:1.00] 467 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 468 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 469 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 470 ret <16 x i32> %r 471 } 472 473 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 474 ; GENERIC-LABEL: vpaddd_maskz_fold_test: 475 ; GENERIC: # %bb.0: 476 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 477 ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 478 ; GENERIC-NEXT: retq # sched: [1:1.00] 479 ; 480 ; SKX-LABEL: vpaddd_maskz_fold_test: 481 ; SKX: # %bb.0: 482 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 483 ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 484 ; SKX-NEXT: retq # sched: [7:1.00] 485 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 486 %j = load <16 x i32>, <16 x i32>* %j.ptr 487 %x = add <16 x i32> %i, %j 488 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 489 ret <16 x i32> %r 490 } 491 492 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 493 ; GENERIC-LABEL: vpaddd_maskz_broadcast_test: 494 ; GENERIC: # %bb.0: 495 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 496 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 497 ; GENERIC-NEXT: retq # sched: [1:1.00] 498 ; 499 ; SKX-LABEL: vpaddd_maskz_broadcast_test: 500 ; SKX: # %bb.0: 501 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 502 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 503 ; SKX-NEXT: retq # sched: [7:1.00] 504 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 505 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 506 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 507 ret <16 x i32> %r 508 } 509 510 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 511 ; GENERIC-LABEL: vpsubq_test: 512 ; GENERIC: # %bb.0: 513 ; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 514 ; GENERIC-NEXT: retq # sched: [1:1.00] 515 ; 516 ; SKX-LABEL: vpsubq_test: 517 ; SKX: # %bb.0: 518 ; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 519 ; SKX-NEXT: retq # sched: [7:1.00] 520 %x = sub <8 x i64> %i, %j 521 ret <8 x i64> %x 522 } 523 524 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 525 ; GENERIC-LABEL: vpsubd_test: 526 ; GENERIC: # %bb.0: 527 ; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 528 ; GENERIC-NEXT: retq # sched: [1:1.00] 529 ; 530 ; SKX-LABEL: vpsubd_test: 531 ; SKX: # %bb.0: 532 ; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 533 ; SKX-NEXT: retq # sched: [7:1.00] 534 %x = sub <16 x i32> %i, %j 535 ret <16 x i32> %x 536 } 537 538 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 539 ; GENERIC-LABEL: vpmulld_test: 540 ; GENERIC: # %bb.0: 541 ; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 542 ; GENERIC-NEXT: retq # sched: [1:1.00] 543 ; 544 ; SKX-LABEL: vpmulld_test: 545 ; SKX: # %bb.0: 546 ; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00] 547 ; SKX-NEXT: retq # sched: [7:1.00] 548 %x = mul <16 x i32> %i, %j 549 ret <16 x i32> %x 550 } 551 552 declare float @sqrtf(float) readnone 553 define float @sqrtA(float %a) nounwind uwtable readnone ssp { 554 ; GENERIC-LABEL: sqrtA: 555 ; GENERIC: # %bb.0: # %entry 556 ; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] 557 ; GENERIC-NEXT: retq # sched: [1:1.00] 558 ; 559 ; SKX-LABEL: sqrtA: 560 ; SKX: # %bb.0: # %entry 561 ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] 562 ; SKX-NEXT: retq # sched: [7:1.00] 563 entry: 564 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 565 ret float %conv1 566 } 567 568 declare double @sqrt(double) readnone 569 define double @sqrtB(double %a) nounwind uwtable readnone ssp { 570 ; GENERIC-LABEL: sqrtB: 571 ; GENERIC: # %bb.0: # %entry 572 ; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] 573 ; GENERIC-NEXT: retq # sched: [1:1.00] 574 ; 575 ; SKX-LABEL: sqrtB: 576 ; SKX: # %bb.0: # %entry 577 ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] 578 ; SKX-NEXT: retq # sched: [7:1.00] 579 entry: 580 %call = tail call double @sqrt(double %a) nounwind readnone 581 ret double %call 582 } 583 584 declare float @llvm.sqrt.f32(float) 585 define float @sqrtC(float %a) nounwind { 586 ; GENERIC-LABEL: sqrtC: 587 ; GENERIC: # %bb.0: 588 ; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] 589 ; GENERIC-NEXT: retq # sched: [1:1.00] 590 ; 591 ; SKX-LABEL: sqrtC: 592 ; SKX: # %bb.0: 593 ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] 594 ; SKX-NEXT: retq # sched: [7:1.00] 595 %b = call float @llvm.sqrt.f32(float %a) 596 ret float %b 597 } 598 599 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 600 define <16 x float> @sqrtD(<16 x float> %a) nounwind { 601 ; GENERIC-LABEL: sqrtD: 602 ; GENERIC: # %bb.0: 603 ; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00] 604 ; GENERIC-NEXT: retq # sched: [1:1.00] 605 ; 606 ; SKX-LABEL: sqrtD: 607 ; SKX: # %bb.0: 608 ; SKX-NEXT: vsqrtps %zmm0, %zmm0 # sched: [20:12.00] 609 ; SKX-NEXT: retq # sched: [7:1.00] 610 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 611 ret <16 x float> %b 612 } 613 614 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 615 define <8 x double> @sqrtE(<8 x double> %a) nounwind { 616 ; GENERIC-LABEL: sqrtE: 617 ; GENERIC: # %bb.0: 618 ; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00] 619 ; GENERIC-NEXT: retq # sched: [1:1.00] 620 ; 621 ; SKX-LABEL: sqrtE: 622 ; SKX: # %bb.0: 623 ; SKX-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [32:24.00] 624 ; SKX-NEXT: retq # sched: [7:1.00] 625 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 626 ret <8 x double> %b 627 } 628 629 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 630 ; GENERIC-LABEL: fadd_broadcast: 631 ; GENERIC: # %bb.0: 632 ; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] 633 ; GENERIC-NEXT: retq # sched: [1:1.00] 634 ; 635 ; SKX-LABEL: fadd_broadcast: 636 ; SKX: # %bb.0: 637 ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] 638 ; SKX-NEXT: retq # sched: [7:1.00] 639 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 640 ret <16 x float> %b 641 } 642 643 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 644 ; GENERIC-LABEL: addq_broadcast: 645 ; GENERIC: # %bb.0: 646 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 647 ; GENERIC-NEXT: retq # sched: [1:1.00] 648 ; 649 ; SKX-LABEL: addq_broadcast: 650 ; SKX: # %bb.0: 651 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 652 ; SKX-NEXT: retq # sched: [7:1.00] 653 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 654 ret <8 x i64> %b 655 } 656 657 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 658 ; GENERIC-LABEL: orq_broadcast: 659 ; GENERIC: # %bb.0: 660 ; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] 661 ; GENERIC-NEXT: retq # sched: [1:1.00] 662 ; 663 ; SKX-LABEL: orq_broadcast: 664 ; SKX: # %bb.0: 665 ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 666 ; SKX-NEXT: retq # sched: [7:1.00] 667 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 668 ret <8 x i64> %b 669 } 670 671 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 672 ; GENERIC-LABEL: andd512fold: 673 ; GENERIC: # %bb.0: # %entry 674 ; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 675 ; GENERIC-NEXT: retq # sched: [1:1.00] 676 ; 677 ; SKX-LABEL: andd512fold: 678 ; SKX: # %bb.0: # %entry 679 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 680 ; SKX-NEXT: retq # sched: [7:1.00] 681 entry: 682 %a = load <16 x i32>, <16 x i32>* %x, align 4 683 %b = and <16 x i32> %y, %a 684 ret <16 x i32> %b 685 } 686 687 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 688 ; GENERIC-LABEL: andqbrst: 689 ; GENERIC: # %bb.0: # %entry 690 ; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] 691 ; GENERIC-NEXT: retq # sched: [1:1.00] 692 ; 693 ; SKX-LABEL: andqbrst: 694 ; SKX: # %bb.0: # %entry 695 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 696 ; SKX-NEXT: retq # sched: [7:1.00] 697 entry: 698 %a = load i64, i64* %ap, align 8 699 %b = insertelement <8 x i64> undef, i64 %a, i32 0 700 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 701 %d = and <8 x i64> %p1, %c 702 ret <8 x i64>%d 703 } 704 705 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 706 ; GENERIC-LABEL: test_mask_vaddps: 707 ; GENERIC: # %bb.0: 708 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 709 ; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 710 ; GENERIC-NEXT: retq # sched: [1:1.00] 711 ; 712 ; SKX-LABEL: test_mask_vaddps: 713 ; SKX: # %bb.0: 714 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 715 ; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 716 ; SKX-NEXT: retq # sched: [7:1.00] 717 <16 x float> %j, <16 x i32> %mask1) 718 nounwind readnone { 719 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 720 %x = fadd <16 x float> %i, %j 721 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 722 ret <16 x float> %r 723 } 724 725 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 726 ; GENERIC-LABEL: test_mask_vmulps: 727 ; GENERIC: # %bb.0: 728 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 729 ; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00] 730 ; GENERIC-NEXT: retq # sched: [1:1.00] 731 ; 732 ; SKX-LABEL: test_mask_vmulps: 733 ; SKX: # %bb.0: 734 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 735 ; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 736 ; SKX-NEXT: retq # sched: [7:1.00] 737 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 738 %x = fmul <16 x float> %i, %j 739 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 740 ret <16 x float> %r 741 } 742 743 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 744 ; GENERIC-LABEL: test_mask_vminps: 745 ; GENERIC: # %bb.0: 746 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 747 ; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 748 ; GENERIC-NEXT: retq # sched: [1:1.00] 749 ; 750 ; SKX-LABEL: test_mask_vminps: 751 ; SKX: # %bb.0: 752 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 753 ; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 754 ; SKX-NEXT: retq # sched: [7:1.00] 755 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 756 %cmp_res = fcmp olt <16 x float> %i, %j 757 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 758 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 759 ret <16 x float> %r 760 } 761 762 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { 763 ; GENERIC-LABEL: test_mask_vminpd: 764 ; GENERIC: # %bb.0: 765 ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 766 ; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 767 ; GENERIC-NEXT: retq # sched: [1:1.00] 768 ; 769 ; SKX-LABEL: test_mask_vminpd: 770 ; SKX: # %bb.0: 771 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 772 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 773 ; SKX-NEXT: retq # sched: [7:1.00] 774 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 775 %cmp_res = fcmp olt <8 x double> %i, %j 776 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 777 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 778 ret <8 x double> %r 779 } 780 781 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 782 ; GENERIC-LABEL: test_mask_vmaxps: 783 ; GENERIC: # %bb.0: 784 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 785 ; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 786 ; GENERIC-NEXT: retq # sched: [1:1.00] 787 ; 788 ; SKX-LABEL: test_mask_vmaxps: 789 ; SKX: # %bb.0: 790 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 791 ; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 792 ; SKX-NEXT: retq # sched: [7:1.00] 793 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 794 %cmp_res = fcmp ogt <16 x float> %i, %j 795 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 796 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 797 ret <16 x float> %r 798 } 799 800 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { 801 ; GENERIC-LABEL: test_mask_vmaxpd: 802 ; GENERIC: # %bb.0: 803 ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 804 ; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 805 ; GENERIC-NEXT: retq # sched: [1:1.00] 806 ; 807 ; SKX-LABEL: test_mask_vmaxpd: 808 ; SKX: # %bb.0: 809 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 810 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 811 ; SKX-NEXT: retq # sched: [7:1.00] 812 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 813 %cmp_res = fcmp ogt <8 x double> %i, %j 814 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 815 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 816 ret <8 x double> %r 817 } 818 819 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 820 ; GENERIC-LABEL: test_mask_vsubps: 821 ; GENERIC: # %bb.0: 822 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 823 ; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 824 ; GENERIC-NEXT: retq # sched: [1:1.00] 825 ; 826 ; SKX-LABEL: test_mask_vsubps: 827 ; SKX: # %bb.0: 828 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 829 ; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 830 ; SKX-NEXT: retq # sched: [7:1.00] 831 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 832 %x = fsub <16 x float> %i, %j 833 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 834 ret <16 x float> %r 835 } 836 837 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 838 ; GENERIC-LABEL: test_mask_vdivps: 839 ; GENERIC: # %bb.0: 840 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 841 ; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00] 842 ; GENERIC-NEXT: retq # sched: [1:1.00] 843 ; 844 ; SKX-LABEL: test_mask_vdivps: 845 ; SKX: # %bb.0: 846 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 847 ; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00] 848 ; SKX-NEXT: retq # sched: [7:1.00] 849 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 850 %x = fdiv <16 x float> %i, %j 851 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 852 ret <16 x float> %r 853 } 854 855 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { 856 ; GENERIC-LABEL: test_mask_vaddpd: 857 ; GENERIC: # %bb.0: 858 ; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 859 ; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 860 ; GENERIC-NEXT: retq # sched: [1:1.00] 861 ; 862 ; SKX-LABEL: test_mask_vaddpd: 863 ; SKX: # %bb.0: 864 ; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 865 ; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 866 ; SKX-NEXT: retq # sched: [7:1.00] 867 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 868 %x = fadd <8 x double> %i, %j 869 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 870 ret <8 x double> %r 871 } 872 873 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { 874 ; GENERIC-LABEL: test_maskz_vaddpd: 875 ; GENERIC: # %bb.0: 876 ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 877 ; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 878 ; GENERIC-NEXT: retq # sched: [1:1.00] 879 ; 880 ; SKX-LABEL: test_maskz_vaddpd: 881 ; SKX: # %bb.0: 882 ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 883 ; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50] 884 ; SKX-NEXT: retq # sched: [7:1.00] 885 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 886 %x = fadd <8 x double> %i, %j 887 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 888 ret <8 x double> %r 889 } 890 891 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { 892 ; GENERIC-LABEL: test_mask_fold_vaddpd: 893 ; GENERIC: # %bb.0: 894 ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 895 ; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] 896 ; GENERIC-NEXT: retq # sched: [1:1.00] 897 ; 898 ; SKX-LABEL: test_mask_fold_vaddpd: 899 ; SKX: # %bb.0: 900 ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 901 ; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50] 902 ; SKX-NEXT: retq # sched: [7:1.00] 903 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 904 %tmp = load <8 x double>, <8 x double>* %j, align 8 905 %x = fadd <8 x double> %i, %tmp 906 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 907 ret <8 x double> %r 908 } 909 910 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { 911 ; GENERIC-LABEL: test_maskz_fold_vaddpd: 912 ; GENERIC: # %bb.0: 913 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 914 ; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] 915 ; GENERIC-NEXT: retq # sched: [1:1.00] 916 ; 917 ; SKX-LABEL: test_maskz_fold_vaddpd: 918 ; SKX: # %bb.0: 919 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 920 ; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] 921 ; SKX-NEXT: retq # sched: [7:1.00] 922 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 923 %tmp = load <8 x double>, <8 x double>* %j, align 8 924 %x = fadd <8 x double> %i, %tmp 925 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 926 ret <8 x double> %r 927 } 928 929 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 930 ; GENERIC-LABEL: test_broadcast_vaddpd: 931 ; GENERIC: # %bb.0: 932 ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00] 933 ; GENERIC-NEXT: retq # sched: [1:1.00] 934 ; 935 ; SKX-LABEL: test_broadcast_vaddpd: 936 ; SKX: # %bb.0: 937 ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50] 938 ; SKX-NEXT: retq # sched: [7:1.00] 939 %tmp = load double, double* %j 940 %b = insertelement <8 x double> undef, double %tmp, i32 0 941 %c = shufflevector <8 x double> %b, <8 x double> undef, 942 <8 x i32> zeroinitializer 943 %x = fadd <8 x double> %c, %i 944 ret <8 x double> %x 945 } 946 947 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { 948 ; GENERIC-LABEL: test_mask_broadcast_vaddpd: 949 ; GENERIC: # %bb.0: 950 ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 951 ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00] 952 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 953 ; GENERIC-NEXT: retq # sched: [1:1.00] 954 ; 955 ; SKX-LABEL: test_mask_broadcast_vaddpd: 956 ; SKX: # %bb.0: 957 ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 958 ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] 959 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 960 ; SKX-NEXT: retq # sched: [7:1.00] 961 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 962 %tmp = load double, double* %j 963 %b = insertelement <8 x double> undef, double %tmp, i32 0 964 %c = shufflevector <8 x double> %b, <8 x double> undef, 965 <8 x i32> zeroinitializer 966 %x = fadd <8 x double> %c, %i 967 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 968 ret <8 x double> %r 969 } 970 971 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 972 ; GENERIC-LABEL: test_maskz_broadcast_vaddpd: 973 ; GENERIC: # %bb.0: 974 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 975 ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] 976 ; GENERIC-NEXT: retq # sched: [1:1.00] 977 ; 978 ; SKX-LABEL: test_maskz_broadcast_vaddpd: 979 ; SKX: # %bb.0: 980 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 981 ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] 982 ; SKX-NEXT: retq # sched: [7:1.00] 983 <8 x i64> %mask1) nounwind { 984 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 985 %tmp = load double, double* %j 986 %b = insertelement <8 x double> undef, double %tmp, i32 0 987 %c = shufflevector <8 x double> %b, <8 x double> undef, 988 <8 x i32> zeroinitializer 989 %x = fadd <8 x double> %c, %i 990 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 991 ret <8 x double> %r 992 } 993 994 define <16 x float> @test_fxor(<16 x float> %a) { 995 ; GENERIC-LABEL: test_fxor: 996 ; GENERIC: # %bb.0: 997 ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] 998 ; GENERIC-NEXT: retq # sched: [1:1.00] 999 ; 1000 ; SKX-LABEL: test_fxor: 1001 ; SKX: # %bb.0: 1002 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 1003 ; SKX-NEXT: retq # sched: [7:1.00] 1004 1005 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1006 ret <16 x float>%res 1007 } 1008 1009 define <8 x float> @test_fxor_8f32(<8 x float> %a) { 1010 ; GENERIC-LABEL: test_fxor_8f32: 1011 ; GENERIC: # %bb.0: 1012 ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] 1013 ; GENERIC-NEXT: retq # sched: [1:1.00] 1014 ; 1015 ; SKX-LABEL: test_fxor_8f32: 1016 ; SKX: # %bb.0: 1017 ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] 1018 ; SKX-NEXT: retq # sched: [7:1.00] 1019 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1020 ret <8 x float>%res 1021 } 1022 1023 define <8 x double> @fabs_v8f64(<8 x double> %p) 1024 ; GENERIC-LABEL: fabs_v8f64: 1025 ; GENERIC: # %bb.0: 1026 ; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] 1027 ; GENERIC-NEXT: retq # sched: [1:1.00] 1028 ; 1029 ; SKX-LABEL: fabs_v8f64: 1030 ; SKX: # %bb.0: 1031 ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 1032 ; SKX-NEXT: retq # sched: [7:1.00] 1033 { 1034 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1035 ret <8 x double> %t 1036 } 1037 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1038 1039 define <16 x float> @fabs_v16f32(<16 x float> %p) 1040 ; GENERIC-LABEL: fabs_v16f32: 1041 ; GENERIC: # %bb.0: 1042 ; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] 1043 ; GENERIC-NEXT: retq # sched: [1:1.00] 1044 ; 1045 ; SKX-LABEL: fabs_v16f32: 1046 ; SKX: # %bb.0: 1047 ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 1048 ; SKX-NEXT: retq # sched: [7:1.00] 1049 { 1050 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1051 ret <16 x float> %t 1052 } 1053 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1054 1055 define double @test1(double %a, double %b) nounwind { 1056 ; GENERIC-LABEL: test1: 1057 ; GENERIC: # %bb.0: 1058 ; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1059 ; GENERIC-NEXT: jne .LBB64_1 # sched: [1:1.00] 1060 ; GENERIC-NEXT: jnp .LBB64_2 # sched: [1:1.00] 1061 ; GENERIC-NEXT: .LBB64_1: # %l1 1062 ; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1063 ; GENERIC-NEXT: retq # sched: [1:1.00] 1064 ; GENERIC-NEXT: .LBB64_2: # %l2 1065 ; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1066 ; GENERIC-NEXT: retq # sched: [1:1.00] 1067 ; 1068 ; SKX-LABEL: test1: 1069 ; SKX: # %bb.0: 1070 ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1071 ; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50] 1072 ; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50] 1073 ; SKX-NEXT: .LBB64_1: # %l1 1074 ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1075 ; SKX-NEXT: retq # sched: [7:1.00] 1076 ; SKX-NEXT: .LBB64_2: # %l2 1077 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1078 ; SKX-NEXT: retq # sched: [7:1.00] 1079 %tobool = fcmp une double %a, %b 1080 br i1 %tobool, label %l1, label %l2 1081 1082 l1: 1083 %c = fsub double %a, %b 1084 ret double %c 1085 l2: 1086 %c1 = fadd double %a, %b 1087 ret double %c1 1088 } 1089 1090 define float @test2(float %a, float %b) nounwind { 1091 ; GENERIC-LABEL: test2: 1092 ; GENERIC: # %bb.0: 1093 ; GENERIC-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] 1094 ; GENERIC-NEXT: jbe .LBB65_2 # sched: [1:1.00] 1095 ; GENERIC-NEXT: # %bb.1: # %l1 1096 ; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1097 ; GENERIC-NEXT: retq # sched: [1:1.00] 1098 ; GENERIC-NEXT: .LBB65_2: # %l2 1099 ; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1100 ; GENERIC-NEXT: retq # sched: [1:1.00] 1101 ; 1102 ; SKX-LABEL: test2: 1103 ; SKX: # %bb.0: 1104 ; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] 1105 ; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50] 1106 ; SKX-NEXT: # %bb.1: # %l1 1107 ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1108 ; SKX-NEXT: retq # sched: [7:1.00] 1109 ; SKX-NEXT: .LBB65_2: # %l2 1110 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1111 ; SKX-NEXT: retq # sched: [7:1.00] 1112 %tobool = fcmp olt float %a, %b 1113 br i1 %tobool, label %l1, label %l2 1114 1115 l1: 1116 %c = fsub float %a, %b 1117 ret float %c 1118 l2: 1119 %c1 = fadd float %a, %b 1120 ret float %c1 1121 } 1122 1123 define i32 @test3(float %a, float %b) { 1124 ; GENERIC-LABEL: test3: 1125 ; GENERIC: # %bb.0: 1126 ; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] 1127 ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] 1128 ; GENERIC-NEXT: retq # sched: [1:1.00] 1129 ; 1130 ; SKX-LABEL: test3: 1131 ; SKX: # %bb.0: 1132 ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] 1133 ; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] 1134 ; SKX-NEXT: retq # sched: [7:1.00] 1135 1136 %cmp10.i = fcmp oeq float %a, %b 1137 %conv11.i = zext i1 %cmp10.i to i32 1138 ret i32 %conv11.i 1139 } 1140 1141 define float @test5(float %p) #0 { 1142 ; GENERIC-LABEL: test5: 1143 ; GENERIC: # %bb.0: # %entry 1144 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 1145 ; GENERIC-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 1146 ; GENERIC-NEXT: jne .LBB67_1 # sched: [1:1.00] 1147 ; GENERIC-NEXT: jp .LBB67_1 # sched: [1:1.00] 1148 ; GENERIC-NEXT: # %bb.2: # %return 1149 ; GENERIC-NEXT: retq # sched: [1:1.00] 1150 ; GENERIC-NEXT: .LBB67_1: # %if.end 1151 ; GENERIC-NEXT: seta %al # sched: [2:1.00] 1152 ; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33] 1153 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 1154 ; GENERIC-NEXT: retq # sched: [1:1.00] 1155 ; 1156 ; SKX-LABEL: test5: 1157 ; SKX: # %bb.0: # %entry 1158 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 1159 ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 1160 ; SKX-NEXT: jne .LBB67_1 # sched: [1:0.50] 1161 ; SKX-NEXT: jp .LBB67_1 # sched: [1:0.50] 1162 ; SKX-NEXT: # %bb.2: # %return 1163 ; SKX-NEXT: retq # sched: [7:1.00] 1164 ; SKX-NEXT: .LBB67_1: # %if.end 1165 ; SKX-NEXT: seta %al # sched: [2:1.00] 1166 ; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25] 1167 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 1168 ; SKX-NEXT: retq # sched: [7:1.00] 1169 entry: 1170 %cmp = fcmp oeq float %p, 0.000000e+00 1171 br i1 %cmp, label %return, label %if.end 1172 1173 if.end: ; preds = %entry 1174 %cmp1 = fcmp ogt float %p, 0.000000e+00 1175 %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00 1176 br label %return 1177 1178 return: ; preds = %if.end, %entry 1179 %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ] 1180 ret float %retval.0 1181 } 1182 1183 define i32 @test6(i32 %a, i32 %b) { 1184 ; GENERIC-LABEL: test6: 1185 ; GENERIC: # %bb.0: 1186 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 1187 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 1188 ; GENERIC-NEXT: sete %al # sched: [1:0.50] 1189 ; GENERIC-NEXT: retq # sched: [1:1.00] 1190 ; 1191 ; SKX-LABEL: test6: 1192 ; SKX: # %bb.0: 1193 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 1194 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 1195 ; SKX-NEXT: sete %al # sched: [1:0.50] 1196 ; SKX-NEXT: retq # sched: [7:1.00] 1197 %cmp = icmp eq i32 %a, %b 1198 %res = zext i1 %cmp to i32 1199 ret i32 %res 1200 } 1201 1202 define i32 @test7(double %x, double %y) #2 { 1203 ; GENERIC-LABEL: test7: 1204 ; GENERIC: # %bb.0: # %entry 1205 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 1206 ; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1207 ; GENERIC-NEXT: setne %al # sched: [1:0.50] 1208 ; GENERIC-NEXT: retq # sched: [1:1.00] 1209 ; 1210 ; SKX-LABEL: test7: 1211 ; SKX: # %bb.0: # %entry 1212 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 1213 ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1214 ; SKX-NEXT: setne %al # sched: [1:0.50] 1215 ; SKX-NEXT: retq # sched: [7:1.00] 1216 entry: 1217 %0 = fcmp one double %x, %y 1218 %or = zext i1 %0 to i32 1219 ret i32 %or 1220 } 1221 1222 define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { 1223 ; GENERIC-LABEL: test8: 1224 ; GENERIC: # %bb.0: 1225 ; GENERIC-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 1226 ; GENERIC-NEXT: # sched: [1:0.33] 1227 ; GENERIC-NEXT: testl %edx, %edx # sched: [1:0.33] 1228 ; GENERIC-NEXT: movl $1, %eax # sched: [1:0.33] 1229 ; GENERIC-NEXT: cmovel %eax, %edx # sched: [2:0.67] 1230 ; GENERIC-NEXT: notl %edi # sched: [1:0.33] 1231 ; GENERIC-NEXT: orl %edi, %esi # sched: [1:0.33] 1232 ; GENERIC-NEXT: cmovnel %edx, %eax # sched: [2:0.67] 1233 ; GENERIC-NEXT: retq # sched: [1:1.00] 1234 ; 1235 ; SKX-LABEL: test8: 1236 ; SKX: # %bb.0: 1237 ; SKX-NEXT: notl %edi # sched: [1:0.25] 1238 ; SKX-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 1239 ; SKX-NEXT: # sched: [1:0.25] 1240 ; SKX-NEXT: testl %edx, %edx # sched: [1:0.25] 1241 ; SKX-NEXT: movl $1, %eax # sched: [1:0.25] 1242 ; SKX-NEXT: cmovel %eax, %edx # sched: [1:0.50] 1243 ; SKX-NEXT: orl %edi, %esi # sched: [1:0.25] 1244 ; SKX-NEXT: cmovnel %edx, %eax # sched: [1:0.50] 1245 ; SKX-NEXT: retq # sched: [7:1.00] 1246 %tmp1 = icmp eq i32 %a1, -1 1247 %tmp2 = icmp eq i32 %a2, -2147483648 1248 %tmp3 = and i1 %tmp1, %tmp2 1249 %tmp4 = icmp eq i32 %a3, 0 1250 %tmp5 = or i1 %tmp3, %tmp4 1251 %res = select i1 %tmp5, i32 1, i32 %a3 1252 ret i32 %res 1253 } 1254 1255 define i32 @test9(i64 %a) { 1256 ; GENERIC-LABEL: test9: 1257 ; GENERIC: # %bb.0: 1258 ; GENERIC-NEXT: testb $1, %dil # sched: [1:0.33] 1259 ; GENERIC-NEXT: jne .LBB71_2 # sched: [1:1.00] 1260 ; GENERIC-NEXT: # %bb.1: # %A 1261 ; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] 1262 ; GENERIC-NEXT: retq # sched: [1:1.00] 1263 ; GENERIC-NEXT: .LBB71_2: # %B 1264 ; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] 1265 ; GENERIC-NEXT: retq # sched: [1:1.00] 1266 ; 1267 ; SKX-LABEL: test9: 1268 ; SKX: # %bb.0: 1269 ; SKX-NEXT: testb $1, %dil # sched: [1:0.25] 1270 ; SKX-NEXT: jne .LBB71_2 # sched: [1:0.50] 1271 ; SKX-NEXT: # %bb.1: # %A 1272 ; SKX-NEXT: movl $6, %eax # sched: [1:0.25] 1273 ; SKX-NEXT: retq # sched: [7:1.00] 1274 ; SKX-NEXT: .LBB71_2: # %B 1275 ; SKX-NEXT: movl $7, %eax # sched: [1:0.25] 1276 ; SKX-NEXT: retq # sched: [7:1.00] 1277 %b = and i64 %a, 1 1278 %cmp10.i = icmp eq i64 %b, 0 1279 br i1 %cmp10.i, label %A, label %B 1280 A: 1281 ret i32 6 1282 B: 1283 ret i32 7 1284 } 1285 1286 define i32 @test10(i64 %b, i64 %c, i1 %d) { 1287 ; GENERIC-LABEL: test10: 1288 ; GENERIC: # %bb.0: 1289 ; GENERIC-NEXT: movl %edx, %eax # sched: [1:0.33] 1290 ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] 1291 ; GENERIC-NEXT: cmpq %rsi, %rdi # sched: [1:0.33] 1292 ; GENERIC-NEXT: sete %cl # sched: [1:0.50] 1293 ; GENERIC-NEXT: orb %dl, %cl # sched: [1:0.33] 1294 ; GENERIC-NEXT: andb $1, %cl # sched: [1:0.33] 1295 ; GENERIC-NEXT: cmpb %cl, %al # sched: [1:0.33] 1296 ; GENERIC-NEXT: je .LBB72_1 # sched: [1:1.00] 1297 ; GENERIC-NEXT: # %bb.2: # %if.end.i 1298 ; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] 1299 ; GENERIC-NEXT: retq # sched: [1:1.00] 1300 ; GENERIC-NEXT: .LBB72_1: # %if.then.i 1301 ; GENERIC-NEXT: movl $5, %eax # sched: [1:0.33] 1302 ; GENERIC-NEXT: retq # sched: [1:1.00] 1303 ; 1304 ; SKX-LABEL: test10: 1305 ; SKX: # %bb.0: 1306 ; SKX-NEXT: movl %edx, %eax # sched: [1:0.25] 1307 ; SKX-NEXT: andb $1, %al # sched: [1:0.25] 1308 ; SKX-NEXT: cmpq %rsi, %rdi # sched: [1:0.25] 1309 ; SKX-NEXT: sete %cl # sched: [1:0.50] 1310 ; SKX-NEXT: orb %dl, %cl # sched: [1:0.25] 1311 ; SKX-NEXT: andb $1, %cl # sched: [1:0.25] 1312 ; SKX-NEXT: cmpb %cl, %al # sched: [1:0.25] 1313 ; SKX-NEXT: je .LBB72_1 # sched: [1:0.50] 1314 ; SKX-NEXT: # %bb.2: # %if.end.i 1315 ; SKX-NEXT: movl $6, %eax # sched: [1:0.25] 1316 ; SKX-NEXT: retq # sched: [7:1.00] 1317 ; SKX-NEXT: .LBB72_1: # %if.then.i 1318 ; SKX-NEXT: movl $5, %eax # sched: [1:0.25] 1319 ; SKX-NEXT: retq # sched: [7:1.00] 1320 1321 %cmp8.i = icmp eq i64 %b, %c 1322 %or1 = or i1 %d, %cmp8.i 1323 %xor1 = xor i1 %d, %or1 1324 br i1 %xor1, label %if.end.i, label %if.then.i 1325 1326 if.then.i: 1327 ret i32 5 1328 1329 if.end.i: 1330 ret i32 6 1331 } 1332 1333 define <16 x float> @sitof32(<16 x i32> %a) nounwind { 1334 ; GENERIC-LABEL: sitof32: 1335 ; GENERIC: # %bb.0: 1336 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 1337 ; GENERIC-NEXT: retq # sched: [1:1.00] 1338 ; 1339 ; SKX-LABEL: sitof32: 1340 ; SKX: # %bb.0: 1341 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 1342 ; SKX-NEXT: retq # sched: [7:1.00] 1343 %b = sitofp <16 x i32> %a to <16 x float> 1344 ret <16 x float> %b 1345 } 1346 1347 define <8 x double> @sltof864(<8 x i64> %a) { 1348 ; GENERIC-LABEL: sltof864: 1349 ; GENERIC: # %bb.0: 1350 ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] 1351 ; GENERIC-NEXT: retq # sched: [1:1.00] 1352 ; 1353 ; SKX-LABEL: sltof864: 1354 ; SKX: # %bb.0: 1355 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] 1356 ; SKX-NEXT: retq # sched: [7:1.00] 1357 %b = sitofp <8 x i64> %a to <8 x double> 1358 ret <8 x double> %b 1359 } 1360 1361 define <4 x double> @slto4f64(<4 x i64> %a) { 1362 ; GENERIC-LABEL: slto4f64: 1363 ; GENERIC: # %bb.0: 1364 ; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00] 1365 ; GENERIC-NEXT: retq # sched: [1:1.00] 1366 ; 1367 ; SKX-LABEL: slto4f64: 1368 ; SKX: # %bb.0: 1369 ; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50] 1370 ; SKX-NEXT: retq # sched: [7:1.00] 1371 %b = sitofp <4 x i64> %a to <4 x double> 1372 ret <4 x double> %b 1373 } 1374 1375 define <2 x double> @slto2f64(<2 x i64> %a) { 1376 ; GENERIC-LABEL: slto2f64: 1377 ; GENERIC: # %bb.0: 1378 ; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00] 1379 ; GENERIC-NEXT: retq # sched: [1:1.00] 1380 ; 1381 ; SKX-LABEL: slto2f64: 1382 ; SKX: # %bb.0: 1383 ; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50] 1384 ; SKX-NEXT: retq # sched: [7:1.00] 1385 %b = sitofp <2 x i64> %a to <2 x double> 1386 ret <2 x double> %b 1387 } 1388 1389 define <2 x float> @sltof2f32(<2 x i64> %a) { 1390 ; GENERIC-LABEL: sltof2f32: 1391 ; GENERIC: # %bb.0: 1392 ; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00] 1393 ; GENERIC-NEXT: retq # sched: [1:1.00] 1394 ; 1395 ; SKX-LABEL: sltof2f32: 1396 ; SKX: # %bb.0: 1397 ; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00] 1398 ; SKX-NEXT: retq # sched: [7:1.00] 1399 %b = sitofp <2 x i64> %a to <2 x float> 1400 ret <2 x float>%b 1401 } 1402 1403 define <4 x float> @slto4f32_mem(<4 x i64>* %a) { 1404 ; GENERIC-LABEL: slto4f32_mem: 1405 ; GENERIC: # %bb.0: 1406 ; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00] 1407 ; GENERIC-NEXT: retq # sched: [1:1.00] 1408 ; 1409 ; SKX-LABEL: slto4f32_mem: 1410 ; SKX: # %bb.0: 1411 ; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50] 1412 ; SKX-NEXT: retq # sched: [7:1.00] 1413 %a1 = load <4 x i64>, <4 x i64>* %a, align 8 1414 %b = sitofp <4 x i64> %a1 to <4 x float> 1415 ret <4 x float>%b 1416 } 1417 1418 define <4 x i64> @f64to4sl(<4 x double> %a) { 1419 ; GENERIC-LABEL: f64to4sl: 1420 ; GENERIC: # %bb.0: 1421 ; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00] 1422 ; GENERIC-NEXT: retq # sched: [1:1.00] 1423 ; 1424 ; SKX-LABEL: f64to4sl: 1425 ; SKX: # %bb.0: 1426 ; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50] 1427 ; SKX-NEXT: retq # sched: [7:1.00] 1428 %b = fptosi <4 x double> %a to <4 x i64> 1429 ret <4 x i64> %b 1430 } 1431 1432 define <4 x i64> @f32to4sl(<4 x float> %a) { 1433 ; GENERIC-LABEL: f32to4sl: 1434 ; GENERIC: # %bb.0: 1435 ; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00] 1436 ; GENERIC-NEXT: retq # sched: [1:1.00] 1437 ; 1438 ; SKX-LABEL: f32to4sl: 1439 ; SKX: # %bb.0: 1440 ; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00] 1441 ; SKX-NEXT: retq # sched: [7:1.00] 1442 %b = fptosi <4 x float> %a to <4 x i64> 1443 ret <4 x i64> %b 1444 } 1445 1446 define <4 x float> @slto4f32(<4 x i64> %a) { 1447 ; GENERIC-LABEL: slto4f32: 1448 ; GENERIC: # %bb.0: 1449 ; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00] 1450 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1451 ; GENERIC-NEXT: retq # sched: [1:1.00] 1452 ; 1453 ; SKX-LABEL: slto4f32: 1454 ; SKX: # %bb.0: 1455 ; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00] 1456 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1457 ; SKX-NEXT: retq # sched: [7:1.00] 1458 %b = sitofp <4 x i64> %a to <4 x float> 1459 ret <4 x float> %b 1460 } 1461 1462 define <4 x float> @ulto4f32(<4 x i64> %a) { 1463 ; GENERIC-LABEL: ulto4f32: 1464 ; GENERIC: # %bb.0: 1465 ; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00] 1466 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1467 ; GENERIC-NEXT: retq # sched: [1:1.00] 1468 ; 1469 ; SKX-LABEL: ulto4f32: 1470 ; SKX: # %bb.0: 1471 ; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00] 1472 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1473 ; SKX-NEXT: retq # sched: [7:1.00] 1474 %b = uitofp <4 x i64> %a to <4 x float> 1475 ret <4 x float> %b 1476 } 1477 1478 define <8 x double> @ulto8f64(<8 x i64> %a) { 1479 ; GENERIC-LABEL: ulto8f64: 1480 ; GENERIC: # %bb.0: 1481 ; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] 1482 ; GENERIC-NEXT: retq # sched: [1:1.00] 1483 ; 1484 ; SKX-LABEL: ulto8f64: 1485 ; SKX: # %bb.0: 1486 ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] 1487 ; SKX-NEXT: retq # sched: [7:1.00] 1488 %b = uitofp <8 x i64> %a to <8 x double> 1489 ret <8 x double> %b 1490 } 1491 1492 define <16 x double> @ulto16f64(<16 x i64> %a) { 1493 ; GENERIC-LABEL: ulto16f64: 1494 ; GENERIC: # %bb.0: 1495 ; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] 1496 ; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00] 1497 ; GENERIC-NEXT: retq # sched: [1:1.00] 1498 ; 1499 ; SKX-LABEL: ulto16f64: 1500 ; SKX: # %bb.0: 1501 ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] 1502 ; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50] 1503 ; SKX-NEXT: retq # sched: [7:1.00] 1504 %b = uitofp <16 x i64> %a to <16 x double> 1505 ret <16 x double> %b 1506 } 1507 1508 define <16 x i32> @f64to16si(<16 x float> %a) nounwind { 1509 ; GENERIC-LABEL: f64to16si: 1510 ; GENERIC: # %bb.0: 1511 ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] 1512 ; GENERIC-NEXT: retq # sched: [1:1.00] 1513 ; 1514 ; SKX-LABEL: f64to16si: 1515 ; SKX: # %bb.0: 1516 ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] 1517 ; SKX-NEXT: retq # sched: [7:1.00] 1518 %b = fptosi <16 x float> %a to <16 x i32> 1519 ret <16 x i32> %b 1520 } 1521 1522 define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { 1523 ; GENERIC-LABEL: f32to16ui: 1524 ; GENERIC: # %bb.0: 1525 ; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00] 1526 ; GENERIC-NEXT: retq # sched: [1:1.00] 1527 ; 1528 ; SKX-LABEL: f32to16ui: 1529 ; SKX: # %bb.0: 1530 ; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50] 1531 ; SKX-NEXT: retq # sched: [7:1.00] 1532 %b = fptoui <16 x float> %a to <16 x i32> 1533 ret <16 x i32> %b 1534 } 1535 1536 define <16 x i8> @f32to16uc(<16 x float> %f) { 1537 ; GENERIC-LABEL: f32to16uc: 1538 ; GENERIC: # %bb.0: 1539 ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] 1540 ; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] 1541 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1542 ; GENERIC-NEXT: retq # sched: [1:1.00] 1543 ; 1544 ; SKX-LABEL: f32to16uc: 1545 ; SKX: # %bb.0: 1546 ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] 1547 ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] 1548 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1549 ; SKX-NEXT: retq # sched: [7:1.00] 1550 %res = fptoui <16 x float> %f to <16 x i8> 1551 ret <16 x i8> %res 1552 } 1553 1554 define <16 x i16> @f32to16us(<16 x float> %f) { 1555 ; GENERIC-LABEL: f32to16us: 1556 ; GENERIC: # %bb.0: 1557 ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] 1558 ; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00] 1559 ; GENERIC-NEXT: retq # sched: [1:1.00] 1560 ; 1561 ; SKX-LABEL: f32to16us: 1562 ; SKX: # %bb.0: 1563 ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] 1564 ; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] 1565 ; SKX-NEXT: retq # sched: [7:1.00] 1566 %res = fptoui <16 x float> %f to <16 x i16> 1567 ret <16 x i16> %res 1568 } 1569 1570 define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { 1571 ; GENERIC-LABEL: f32to8ui: 1572 ; GENERIC: # %bb.0: 1573 ; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00] 1574 ; GENERIC-NEXT: retq # sched: [1:1.00] 1575 ; 1576 ; SKX-LABEL: f32to8ui: 1577 ; SKX: # %bb.0: 1578 ; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50] 1579 ; SKX-NEXT: retq # sched: [7:1.00] 1580 %b = fptoui <8 x float> %a to <8 x i32> 1581 ret <8 x i32> %b 1582 } 1583 1584 define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { 1585 ; GENERIC-LABEL: f32to4ui: 1586 ; GENERIC: # %bb.0: 1587 ; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00] 1588 ; GENERIC-NEXT: retq # sched: [1:1.00] 1589 ; 1590 ; SKX-LABEL: f32to4ui: 1591 ; SKX: # %bb.0: 1592 ; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50] 1593 ; SKX-NEXT: retq # sched: [7:1.00] 1594 %b = fptoui <4 x float> %a to <4 x i32> 1595 ret <4 x i32> %b 1596 } 1597 1598 define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { 1599 ; GENERIC-LABEL: f64to8ui: 1600 ; GENERIC: # %bb.0: 1601 ; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00] 1602 ; GENERIC-NEXT: retq # sched: [1:1.00] 1603 ; 1604 ; SKX-LABEL: f64to8ui: 1605 ; SKX: # %bb.0: 1606 ; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00] 1607 ; SKX-NEXT: retq # sched: [7:1.00] 1608 %b = fptoui <8 x double> %a to <8 x i32> 1609 ret <8 x i32> %b 1610 } 1611 1612 define <8 x i16> @f64to8us(<8 x double> %f) { 1613 ; GENERIC-LABEL: f64to8us: 1614 ; GENERIC: # %bb.0: 1615 ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] 1616 ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] 1617 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1618 ; GENERIC-NEXT: retq # sched: [1:1.00] 1619 ; 1620 ; SKX-LABEL: f64to8us: 1621 ; SKX: # %bb.0: 1622 ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] 1623 ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] 1624 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1625 ; SKX-NEXT: retq # sched: [7:1.00] 1626 %res = fptoui <8 x double> %f to <8 x i16> 1627 ret <8 x i16> %res 1628 } 1629 1630 define <8 x i8> @f64to8uc(<8 x double> %f) { 1631 ; GENERIC-LABEL: f64to8uc: 1632 ; GENERIC: # %bb.0: 1633 ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] 1634 ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] 1635 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1636 ; GENERIC-NEXT: retq # sched: [1:1.00] 1637 ; 1638 ; SKX-LABEL: f64to8uc: 1639 ; SKX: # %bb.0: 1640 ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] 1641 ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] 1642 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1643 ; SKX-NEXT: retq # sched: [7:1.00] 1644 %res = fptoui <8 x double> %f to <8 x i8> 1645 ret <8 x i8> %res 1646 } 1647 1648 define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { 1649 ; GENERIC-LABEL: f64to4ui: 1650 ; GENERIC: # %bb.0: 1651 ; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00] 1652 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1653 ; GENERIC-NEXT: retq # sched: [1:1.00] 1654 ; 1655 ; SKX-LABEL: f64to4ui: 1656 ; SKX: # %bb.0: 1657 ; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00] 1658 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1659 ; SKX-NEXT: retq # sched: [7:1.00] 1660 %b = fptoui <4 x double> %a to <4 x i32> 1661 ret <4 x i32> %b 1662 } 1663 1664 define <8 x double> @sito8f64(<8 x i32> %a) { 1665 ; GENERIC-LABEL: sito8f64: 1666 ; GENERIC: # %bb.0: 1667 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 1668 ; GENERIC-NEXT: retq # sched: [1:1.00] 1669 ; 1670 ; SKX-LABEL: sito8f64: 1671 ; SKX: # %bb.0: 1672 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 1673 ; SKX-NEXT: retq # sched: [7:1.00] 1674 %b = sitofp <8 x i32> %a to <8 x double> 1675 ret <8 x double> %b 1676 } 1677 define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 1678 ; GENERIC-LABEL: i32to8f64_mask: 1679 ; GENERIC: # %bb.0: 1680 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 1681 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] 1682 ; GENERIC-NEXT: retq # sched: [1:1.00] 1683 ; 1684 ; SKX-LABEL: i32to8f64_mask: 1685 ; SKX: # %bb.0: 1686 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 1687 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] 1688 ; SKX-NEXT: retq # sched: [7:1.00] 1689 ; VLNOBW-LABEL: i32to8f64_mask: 1690 ; VLNOBW: # %bb.0: 1691 ; VLNOBW-NEXT: kmovw %edi, %k1 1692 ; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 1693 ; VLNOBW-NEXT: ret{{[l|q]}} 1694 %1 = bitcast i8 %c to <8 x i1> 1695 %2 = sitofp <8 x i32> %b to <8 x double> 1696 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 1697 ret <8 x double> %3 1698 } 1699 define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 1700 ; GENERIC-LABEL: sito8f64_maskz: 1701 ; GENERIC: # %bb.0: 1702 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 1703 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] 1704 ; GENERIC-NEXT: retq # sched: [1:1.00] 1705 ; 1706 ; SKX-LABEL: sito8f64_maskz: 1707 ; SKX: # %bb.0: 1708 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 1709 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] 1710 ; SKX-NEXT: retq # sched: [7:1.00] 1711 ; VLNOBW-LABEL: sito8f64_maskz: 1712 ; VLNOBW: # %bb.0: 1713 ; VLNOBW-NEXT: kmovw %edi, %k1 1714 ; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 1715 ; VLNOBW-NEXT: ret{{[l|q]}} 1716 %1 = bitcast i8 %b to <8 x i1> 1717 %2 = sitofp <8 x i32> %a to <8 x double> 1718 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 1719 ret <8 x double> %3 1720 } 1721 1722 define <8 x i32> @f64to8si(<8 x double> %a) { 1723 ; GENERIC-LABEL: f64to8si: 1724 ; GENERIC: # %bb.0: 1725 ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] 1726 ; GENERIC-NEXT: retq # sched: [1:1.00] 1727 ; 1728 ; SKX-LABEL: f64to8si: 1729 ; SKX: # %bb.0: 1730 ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] 1731 ; SKX-NEXT: retq # sched: [7:1.00] 1732 %b = fptosi <8 x double> %a to <8 x i32> 1733 ret <8 x i32> %b 1734 } 1735 1736 define <4 x i32> @f64to4si(<4 x double> %a) { 1737 ; GENERIC-LABEL: f64to4si: 1738 ; GENERIC: # %bb.0: 1739 ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] 1740 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1741 ; GENERIC-NEXT: retq # sched: [1:1.00] 1742 ; 1743 ; SKX-LABEL: f64to4si: 1744 ; SKX: # %bb.0: 1745 ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] 1746 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1747 ; SKX-NEXT: retq # sched: [7:1.00] 1748 %b = fptosi <4 x double> %a to <4 x i32> 1749 ret <4 x i32> %b 1750 } 1751 1752 define <16 x float> @f64to16f32(<16 x double> %b) nounwind { 1753 ; GENERIC-LABEL: f64to16f32: 1754 ; GENERIC: # %bb.0: 1755 ; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] 1756 ; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00] 1757 ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] 1758 ; GENERIC-NEXT: retq # sched: [1:1.00] 1759 ; 1760 ; SKX-LABEL: f64to16f32: 1761 ; SKX: # %bb.0: 1762 ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] 1763 ; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00] 1764 ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] 1765 ; SKX-NEXT: retq # sched: [7:1.00] 1766 %a = fptrunc <16 x double> %b to <16 x float> 1767 ret <16 x float> %a 1768 } 1769 1770 define <4 x float> @f64to4f32(<4 x double> %b) { 1771 ; GENERIC-LABEL: f64to4f32: 1772 ; GENERIC: # %bb.0: 1773 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] 1774 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1775 ; GENERIC-NEXT: retq # sched: [1:1.00] 1776 ; 1777 ; SKX-LABEL: f64to4f32: 1778 ; SKX: # %bb.0: 1779 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] 1780 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1781 ; SKX-NEXT: retq # sched: [7:1.00] 1782 %a = fptrunc <4 x double> %b to <4 x float> 1783 ret <4 x float> %a 1784 } 1785 1786 define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { 1787 ; GENERIC-LABEL: f64to4f32_mask: 1788 ; GENERIC: # %bb.0: 1789 ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] 1790 ; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] 1791 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00] 1792 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1793 ; GENERIC-NEXT: retq # sched: [1:1.00] 1794 ; 1795 ; SKX-LABEL: f64to4f32_mask: 1796 ; SKX: # %bb.0: 1797 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] 1798 ; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] 1799 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] 1800 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1801 ; SKX-NEXT: retq # sched: [7:1.00] 1802 %a = fptrunc <4 x double> %b to <4 x float> 1803 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer 1804 ret <4 x float> %c 1805 } 1806 1807 define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 1808 ; GENERIC-LABEL: f64tof32_inreg: 1809 ; GENERIC: # %bb.0: 1810 ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00] 1811 ; GENERIC-NEXT: retq # sched: [1:1.00] 1812 ; 1813 ; SKX-LABEL: f64tof32_inreg: 1814 ; SKX: # %bb.0: 1815 ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] 1816 ; SKX-NEXT: retq # sched: [7:1.00] 1817 %ext = extractelement <2 x double> %a0, i32 0 1818 %cvt = fptrunc double %ext to float 1819 %res = insertelement <4 x float> %a1, float %cvt, i32 0 1820 ret <4 x float> %res 1821 } 1822 1823 define <8 x double> @f32to8f64(<8 x float> %b) nounwind { 1824 ; GENERIC-LABEL: f32to8f64: 1825 ; GENERIC: # %bb.0: 1826 ; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] 1827 ; GENERIC-NEXT: retq # sched: [1:1.00] 1828 ; 1829 ; SKX-LABEL: f32to8f64: 1830 ; SKX: # %bb.0: 1831 ; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] 1832 ; SKX-NEXT: retq # sched: [7:1.00] 1833 %a = fpext <8 x float> %b to <8 x double> 1834 ret <8 x double> %a 1835 } 1836 1837 define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { 1838 ; GENERIC-LABEL: f32to4f64_mask: 1839 ; GENERIC: # %bb.0: 1840 ; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] 1841 ; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00] 1842 ; GENERIC-NEXT: retq # sched: [1:1.00] 1843 ; 1844 ; SKX-LABEL: f32to4f64_mask: 1845 ; SKX: # %bb.0: 1846 ; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] 1847 ; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00] 1848 ; SKX-NEXT: retq # sched: [7:1.00] 1849 %a = fpext <4 x float> %b to <4 x double> 1850 %mask = fcmp ogt <4 x double> %a1, %b1 1851 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer 1852 ret <4 x double> %c 1853 } 1854 1855 define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 1856 ; GENERIC-LABEL: f32tof64_inreg: 1857 ; GENERIC: # %bb.0: 1858 ; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1859 ; GENERIC-NEXT: retq # sched: [1:1.00] 1860 ; 1861 ; SKX-LABEL: f32tof64_inreg: 1862 ; SKX: # %bb.0: 1863 ; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1864 ; SKX-NEXT: retq # sched: [7:1.00] 1865 %ext = extractelement <4 x float> %a1, i32 0 1866 %cvt = fpext float %ext to double 1867 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1868 ret <2 x double> %res 1869 } 1870 1871 define double @sltof64_load(i64* nocapture %e) { 1872 ; GENERIC-LABEL: sltof64_load: 1873 ; GENERIC: # %bb.0: # %entry 1874 ; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1875 ; GENERIC-NEXT: retq # sched: [1:1.00] 1876 ; 1877 ; SKX-LABEL: sltof64_load: 1878 ; SKX: # %bb.0: # %entry 1879 ; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1880 ; SKX-NEXT: retq # sched: [7:1.00] 1881 entry: 1882 %tmp1 = load i64, i64* %e, align 8 1883 %conv = sitofp i64 %tmp1 to double 1884 ret double %conv 1885 } 1886 1887 define double @sitof64_load(i32* %e) { 1888 ; GENERIC-LABEL: sitof64_load: 1889 ; GENERIC: # %bb.0: # %entry 1890 ; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1891 ; GENERIC-NEXT: retq # sched: [1:1.00] 1892 ; 1893 ; SKX-LABEL: sitof64_load: 1894 ; SKX: # %bb.0: # %entry 1895 ; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1896 ; SKX-NEXT: retq # sched: [7:1.00] 1897 entry: 1898 %tmp1 = load i32, i32* %e, align 4 1899 %conv = sitofp i32 %tmp1 to double 1900 ret double %conv 1901 } 1902 1903 define float @sitof32_load(i32* %e) { 1904 ; GENERIC-LABEL: sitof32_load: 1905 ; GENERIC: # %bb.0: # %entry 1906 ; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 1907 ; GENERIC-NEXT: retq # sched: [1:1.00] 1908 ; 1909 ; SKX-LABEL: sitof32_load: 1910 ; SKX: # %bb.0: # %entry 1911 ; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1912 ; SKX-NEXT: retq # sched: [7:1.00] 1913 entry: 1914 %tmp1 = load i32, i32* %e, align 4 1915 %conv = sitofp i32 %tmp1 to float 1916 ret float %conv 1917 } 1918 1919 define float @sltof32_load(i64* %e) { 1920 ; GENERIC-LABEL: sltof32_load: 1921 ; GENERIC: # %bb.0: # %entry 1922 ; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 1923 ; GENERIC-NEXT: retq # sched: [1:1.00] 1924 ; 1925 ; SKX-LABEL: sltof32_load: 1926 ; SKX: # %bb.0: # %entry 1927 ; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1928 ; SKX-NEXT: retq # sched: [7:1.00] 1929 entry: 1930 %tmp1 = load i64, i64* %e, align 8 1931 %conv = sitofp i64 %tmp1 to float 1932 ret float %conv 1933 } 1934 1935 define void @f32tof64_loadstore() { 1936 ; GENERIC-LABEL: f32tof64_loadstore: 1937 ; GENERIC: # %bb.0: # %entry 1938 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 1939 ; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 1940 ; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1941 ; GENERIC-NEXT: retq # sched: [1:1.00] 1942 ; 1943 ; SKX-LABEL: f32tof64_loadstore: 1944 ; SKX: # %bb.0: # %entry 1945 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 1946 ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 1947 ; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1948 ; SKX-NEXT: retq # sched: [7:1.00] 1949 entry: 1950 %f = alloca float, align 4 1951 %d = alloca double, align 8 1952 %tmp = load float, float* %f, align 4 1953 %conv = fpext float %tmp to double 1954 store double %conv, double* %d, align 8 1955 ret void 1956 } 1957 1958 define void @f64tof32_loadstore() nounwind uwtable { 1959 ; GENERIC-LABEL: f64tof32_loadstore: 1960 ; GENERIC: # %bb.0: # %entry 1961 ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 1962 ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 1963 ; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1964 ; GENERIC-NEXT: retq # sched: [1:1.00] 1965 ; 1966 ; SKX-LABEL: f64tof32_loadstore: 1967 ; SKX: # %bb.0: # %entry 1968 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 1969 ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 1970 ; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1971 ; SKX-NEXT: retq # sched: [7:1.00] 1972 entry: 1973 %f = alloca float, align 4 1974 %d = alloca double, align 8 1975 %tmp = load double, double* %d, align 8 1976 %conv = fptrunc double %tmp to float 1977 store float %conv, float* %f, align 4 1978 ret void 1979 } 1980 1981 define double @long_to_double(i64 %x) { 1982 ; GENERIC-LABEL: long_to_double: 1983 ; GENERIC: # %bb.0: 1984 ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 1985 ; GENERIC-NEXT: retq # sched: [1:1.00] 1986 ; 1987 ; SKX-LABEL: long_to_double: 1988 ; SKX: # %bb.0: 1989 ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 1990 ; SKX-NEXT: retq # sched: [7:1.00] 1991 %res = bitcast i64 %x to double 1992 ret double %res 1993 } 1994 1995 define i64 @double_to_long(double %x) { 1996 ; GENERIC-LABEL: double_to_long: 1997 ; GENERIC: # %bb.0: 1998 ; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 1999 ; GENERIC-NEXT: retq # sched: [1:1.00] 2000 ; 2001 ; SKX-LABEL: double_to_long: 2002 ; SKX: # %bb.0: 2003 ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 2004 ; SKX-NEXT: retq # sched: [7:1.00] 2005 %res = bitcast double %x to i64 2006 ret i64 %res 2007 } 2008 2009 define float @int_to_float(i32 %x) { 2010 ; GENERIC-LABEL: int_to_float: 2011 ; GENERIC: # %bb.0: 2012 ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 2013 ; GENERIC-NEXT: retq # sched: [1:1.00] 2014 ; 2015 ; SKX-LABEL: int_to_float: 2016 ; SKX: # %bb.0: 2017 ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 2018 ; SKX-NEXT: retq # sched: [7:1.00] 2019 %res = bitcast i32 %x to float 2020 ret float %res 2021 } 2022 2023 define i32 @float_to_int(float %x) { 2024 ; GENERIC-LABEL: float_to_int: 2025 ; GENERIC: # %bb.0: 2026 ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 2027 ; GENERIC-NEXT: retq # sched: [1:1.00] 2028 ; 2029 ; SKX-LABEL: float_to_int: 2030 ; SKX: # %bb.0: 2031 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 2032 ; SKX-NEXT: retq # sched: [7:1.00] 2033 %res = bitcast float %x to i32 2034 ret i32 %res 2035 } 2036 2037 define <16 x double> @uito16f64(<16 x i32> %a) nounwind { 2038 ; GENERIC-LABEL: uito16f64: 2039 ; GENERIC: # %bb.0: 2040 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00] 2041 ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] 2042 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00] 2043 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 2044 ; GENERIC-NEXT: retq # sched: [1:1.00] 2045 ; 2046 ; SKX-LABEL: uito16f64: 2047 ; SKX: # %bb.0: 2048 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00] 2049 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] 2050 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00] 2051 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 2052 ; SKX-NEXT: retq # sched: [7:1.00] 2053 %b = uitofp <16 x i32> %a to <16 x double> 2054 ret <16 x double> %b 2055 } 2056 2057 define <8 x float> @slto8f32(<8 x i64> %a) { 2058 ; GENERIC-LABEL: slto8f32: 2059 ; GENERIC: # %bb.0: 2060 ; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2061 ; GENERIC-NEXT: retq # sched: [1:1.00] 2062 ; 2063 ; SKX-LABEL: slto8f32: 2064 ; SKX: # %bb.0: 2065 ; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2066 ; SKX-NEXT: retq # sched: [7:1.00] 2067 %b = sitofp <8 x i64> %a to <8 x float> 2068 ret <8 x float> %b 2069 } 2070 2071 define <16 x float> @slto16f32(<16 x i64> %a) { 2072 ; GENERIC-LABEL: slto16f32: 2073 ; GENERIC: # %bb.0: 2074 ; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2075 ; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00] 2076 ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] 2077 ; GENERIC-NEXT: retq # sched: [1:1.00] 2078 ; 2079 ; SKX-LABEL: slto16f32: 2080 ; SKX: # %bb.0: 2081 ; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2082 ; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00] 2083 ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] 2084 ; SKX-NEXT: retq # sched: [7:1.00] 2085 %b = sitofp <16 x i64> %a to <16 x float> 2086 ret <16 x float> %b 2087 } 2088 2089 define <8 x double> @slto8f64(<8 x i64> %a) { 2090 ; GENERIC-LABEL: slto8f64: 2091 ; GENERIC: # %bb.0: 2092 ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] 2093 ; GENERIC-NEXT: retq # sched: [1:1.00] 2094 ; 2095 ; SKX-LABEL: slto8f64: 2096 ; SKX: # %bb.0: 2097 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] 2098 ; SKX-NEXT: retq # sched: [7:1.00] 2099 %b = sitofp <8 x i64> %a to <8 x double> 2100 ret <8 x double> %b 2101 } 2102 2103 define <16 x double> @slto16f64(<16 x i64> %a) { 2104 ; GENERIC-LABEL: slto16f64: 2105 ; GENERIC: # %bb.0: 2106 ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] 2107 ; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00] 2108 ; GENERIC-NEXT: retq # sched: [1:1.00] 2109 ; 2110 ; SKX-LABEL: slto16f64: 2111 ; SKX: # %bb.0: 2112 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] 2113 ; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50] 2114 ; SKX-NEXT: retq # sched: [7:1.00] 2115 %b = sitofp <16 x i64> %a to <16 x double> 2116 ret <16 x double> %b 2117 } 2118 2119 define <8 x float> @ulto8f32(<8 x i64> %a) { 2120 ; GENERIC-LABEL: ulto8f32: 2121 ; GENERIC: # %bb.0: 2122 ; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2123 ; GENERIC-NEXT: retq # sched: [1:1.00] 2124 ; 2125 ; SKX-LABEL: ulto8f32: 2126 ; SKX: # %bb.0: 2127 ; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2128 ; SKX-NEXT: retq # sched: [7:1.00] 2129 %b = uitofp <8 x i64> %a to <8 x float> 2130 ret <8 x float> %b 2131 } 2132 2133 define <16 x float> @ulto16f32(<16 x i64> %a) { 2134 ; GENERIC-LABEL: ulto16f32: 2135 ; GENERIC: # %bb.0: 2136 ; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2137 ; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00] 2138 ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] 2139 ; GENERIC-NEXT: retq # sched: [1:1.00] 2140 ; 2141 ; SKX-LABEL: ulto16f32: 2142 ; SKX: # %bb.0: 2143 ; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2144 ; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00] 2145 ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] 2146 ; SKX-NEXT: retq # sched: [7:1.00] 2147 %b = uitofp <16 x i64> %a to <16 x float> 2148 ret <16 x float> %b 2149 } 2150 2151 define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 2152 ; GENERIC-LABEL: uito8f64_mask: 2153 ; GENERIC: # %bb.0: 2154 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 2155 ; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] 2156 ; GENERIC-NEXT: retq # sched: [1:1.00] 2157 ; 2158 ; SKX-LABEL: uito8f64_mask: 2159 ; SKX: # %bb.0: 2160 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 2161 ; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] 2162 ; SKX-NEXT: retq # sched: [7:1.00] 2163 ; VLNOBW-LABEL: uito8f64_mask: 2164 ; VLNOBW: # %bb.0: 2165 ; VLNOBW-NEXT: kmovw %edi, %k1 2166 ; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 2167 ; VLNOBW-NEXT: ret{{[l|q]}} 2168 %1 = bitcast i8 %c to <8 x i1> 2169 %2 = uitofp <8 x i32> %b to <8 x double> 2170 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 2171 ret <8 x double> %3 2172 } 2173 define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 2174 ; GENERIC-LABEL: uito8f64_maskz: 2175 ; GENERIC: # %bb.0: 2176 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 2177 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] 2178 ; GENERIC-NEXT: retq # sched: [1:1.00] 2179 ; 2180 ; SKX-LABEL: uito8f64_maskz: 2181 ; SKX: # %bb.0: 2182 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 2183 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] 2184 ; SKX-NEXT: retq # sched: [7:1.00] 2185 %1 = bitcast i8 %b to <8 x i1> 2186 %2 = uitofp <8 x i32> %a to <8 x double> 2187 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 2188 ret <8 x double> %3 2189 } 2190 2191 define <4 x double> @uito4f64(<4 x i32> %a) nounwind { 2192 ; GENERIC-LABEL: uito4f64: 2193 ; GENERIC: # %bb.0: 2194 ; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00] 2195 ; GENERIC-NEXT: retq # sched: [1:1.00] 2196 ; 2197 ; SKX-LABEL: uito4f64: 2198 ; SKX: # %bb.0: 2199 ; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00] 2200 ; SKX-NEXT: retq # sched: [7:1.00] 2201 %b = uitofp <4 x i32> %a to <4 x double> 2202 ret <4 x double> %b 2203 } 2204 2205 define <16 x float> @uito16f32(<16 x i32> %a) nounwind { 2206 ; GENERIC-LABEL: uito16f32: 2207 ; GENERIC: # %bb.0: 2208 ; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00] 2209 ; GENERIC-NEXT: retq # sched: [1:1.00] 2210 ; 2211 ; SKX-LABEL: uito16f32: 2212 ; SKX: # %bb.0: 2213 ; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50] 2214 ; SKX-NEXT: retq # sched: [7:1.00] 2215 %b = uitofp <16 x i32> %a to <16 x float> 2216 ret <16 x float> %b 2217 } 2218 2219 define <8 x double> @uito8f64(<8 x i32> %a) { 2220 ; GENERIC-LABEL: uito8f64: 2221 ; GENERIC: # %bb.0: 2222 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00] 2223 ; GENERIC-NEXT: retq # sched: [1:1.00] 2224 ; 2225 ; SKX-LABEL: uito8f64: 2226 ; SKX: # %bb.0: 2227 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] 2228 ; SKX-NEXT: retq # sched: [7:1.00] 2229 %b = uitofp <8 x i32> %a to <8 x double> 2230 ret <8 x double> %b 2231 } 2232 2233 define <8 x float> @uito8f32(<8 x i32> %a) nounwind { 2234 ; GENERIC-LABEL: uito8f32: 2235 ; GENERIC: # %bb.0: 2236 ; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00] 2237 ; GENERIC-NEXT: retq # sched: [1:1.00] 2238 ; 2239 ; SKX-LABEL: uito8f32: 2240 ; SKX: # %bb.0: 2241 ; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50] 2242 ; SKX-NEXT: retq # sched: [7:1.00] 2243 %b = uitofp <8 x i32> %a to <8 x float> 2244 ret <8 x float> %b 2245 } 2246 2247 define <4 x float> @uito4f32(<4 x i32> %a) nounwind { 2248 ; GENERIC-LABEL: uito4f32: 2249 ; GENERIC: # %bb.0: 2250 ; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00] 2251 ; GENERIC-NEXT: retq # sched: [1:1.00] 2252 ; 2253 ; SKX-LABEL: uito4f32: 2254 ; SKX: # %bb.0: 2255 ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50] 2256 ; SKX-NEXT: retq # sched: [7:1.00] 2257 %b = uitofp <4 x i32> %a to <4 x float> 2258 ret <4 x float> %b 2259 } 2260 2261 define i32 @fptosi(float %a) nounwind { 2262 ; GENERIC-LABEL: fptosi: 2263 ; GENERIC: # %bb.0: 2264 ; GENERIC-NEXT: vcvttss2si %xmm0, %eax # sched: [5:1.00] 2265 ; GENERIC-NEXT: retq # sched: [1:1.00] 2266 ; 2267 ; SKX-LABEL: fptosi: 2268 ; SKX: # %bb.0: 2269 ; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00] 2270 ; SKX-NEXT: retq # sched: [7:1.00] 2271 %b = fptosi float %a to i32 2272 ret i32 %b 2273 } 2274 2275 define i32 @fptoui(float %a) nounwind { 2276 ; GENERIC-LABEL: fptoui: 2277 ; GENERIC: # %bb.0: 2278 ; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [5:1.00] 2279 ; GENERIC-NEXT: retq # sched: [1:1.00] 2280 ; 2281 ; SKX-LABEL: fptoui: 2282 ; SKX: # %bb.0: 2283 ; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [6:1.00] 2284 ; SKX-NEXT: retq # sched: [7:1.00] 2285 %b = fptoui float %a to i32 2286 ret i32 %b 2287 } 2288 2289 define float @uitof32(i32 %a) nounwind { 2290 ; GENERIC-LABEL: uitof32: 2291 ; GENERIC: # %bb.0: 2292 ; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] 2293 ; GENERIC-NEXT: retq # sched: [1:1.00] 2294 ; 2295 ; SKX-LABEL: uitof32: 2296 ; SKX: # %bb.0: 2297 ; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2298 ; SKX-NEXT: retq # sched: [7:1.00] 2299 %b = uitofp i32 %a to float 2300 ret float %b 2301 } 2302 2303 define double @uitof64(i32 %a) nounwind { 2304 ; GENERIC-LABEL: uitof64: 2305 ; GENERIC: # %bb.0: 2306 ; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] 2307 ; GENERIC-NEXT: retq # sched: [1:1.00] 2308 ; 2309 ; SKX-LABEL: uitof64: 2310 ; SKX: # %bb.0: 2311 ; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2312 ; SKX-NEXT: retq # sched: [7:1.00] 2313 %b = uitofp i32 %a to double 2314 ret double %b 2315 } 2316 2317 define <16 x float> @sbto16f32(<16 x i32> %a) { 2318 ; GENERIC-LABEL: sbto16f32: 2319 ; GENERIC: # %bb.0: 2320 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 2321 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 2322 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2323 ; GENERIC-NEXT: retq # sched: [1:1.00] 2324 ; 2325 ; SKX-LABEL: sbto16f32: 2326 ; SKX: # %bb.0: 2327 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 2328 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 2329 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2330 ; SKX-NEXT: retq # sched: [7:1.00] 2331 %mask = icmp slt <16 x i32> %a, zeroinitializer 2332 %1 = sitofp <16 x i1> %mask to <16 x float> 2333 ret <16 x float> %1 2334 } 2335 2336 define <16 x float> @scto16f32(<16 x i8> %a) { 2337 ; GENERIC-LABEL: scto16f32: 2338 ; GENERIC: # %bb.0: 2339 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] 2340 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2341 ; GENERIC-NEXT: retq # sched: [1:1.00] 2342 ; 2343 ; SKX-LABEL: scto16f32: 2344 ; SKX: # %bb.0: 2345 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] 2346 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2347 ; SKX-NEXT: retq # sched: [7:1.00] 2348 %1 = sitofp <16 x i8> %a to <16 x float> 2349 ret <16 x float> %1 2350 } 2351 2352 define <16 x float> @ssto16f32(<16 x i16> %a) { 2353 ; GENERIC-LABEL: ssto16f32: 2354 ; GENERIC: # %bb.0: 2355 ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] 2356 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2357 ; GENERIC-NEXT: retq # sched: [1:1.00] 2358 ; 2359 ; SKX-LABEL: ssto16f32: 2360 ; SKX: # %bb.0: 2361 ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] 2362 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2363 ; SKX-NEXT: retq # sched: [7:1.00] 2364 %1 = sitofp <16 x i16> %a to <16 x float> 2365 ret <16 x float> %1 2366 } 2367 2368 define <8 x double> @ssto16f64(<8 x i16> %a) { 2369 ; GENERIC-LABEL: ssto16f64: 2370 ; GENERIC: # %bb.0: 2371 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] 2372 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2373 ; GENERIC-NEXT: retq # sched: [1:1.00] 2374 ; 2375 ; SKX-LABEL: ssto16f64: 2376 ; SKX: # %bb.0: 2377 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 2378 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2379 ; SKX-NEXT: retq # sched: [7:1.00] 2380 %1 = sitofp <8 x i16> %a to <8 x double> 2381 ret <8 x double> %1 2382 } 2383 2384 define <8 x double> @scto8f64(<8 x i8> %a) { 2385 ; GENERIC-LABEL: scto8f64: 2386 ; GENERIC: # %bb.0: 2387 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 2388 ; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00] 2389 ; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00] 2390 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2391 ; GENERIC-NEXT: retq # sched: [1:1.00] 2392 ; 2393 ; SKX-LABEL: scto8f64: 2394 ; SKX: # %bb.0: 2395 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 2396 ; SKX-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50] 2397 ; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50] 2398 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2399 ; SKX-NEXT: retq # sched: [7:1.00] 2400 %1 = sitofp <8 x i8> %a to <8 x double> 2401 ret <8 x double> %1 2402 } 2403 2404 define <16 x double> @scto16f64(<16 x i8> %a) { 2405 ; GENERIC-LABEL: scto16f64: 2406 ; GENERIC: # %bb.0: 2407 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00] 2408 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2409 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2410 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2411 ; GENERIC-NEXT: retq # sched: [1:1.00] 2412 ; 2413 ; SKX-LABEL: scto16f64: 2414 ; SKX: # %bb.0: 2415 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00] 2416 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2417 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2418 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2419 ; SKX-NEXT: retq # sched: [7:1.00] 2420 %b = sitofp <16 x i8> %a to <16 x double> 2421 ret <16 x double> %b 2422 } 2423 2424 define <16 x double> @sbto16f64(<16 x double> %a) { 2425 ; GENERIC-LABEL: sbto16f64: 2426 ; GENERIC: # %bb.0: 2427 ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 2428 ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] 2429 ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] 2430 ; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00] 2431 ; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33] 2432 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2433 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2434 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2435 ; GENERIC-NEXT: retq # sched: [1:1.00] 2436 ; 2437 ; SKX-LABEL: sbto16f64: 2438 ; SKX: # %bb.0: 2439 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 2440 ; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] 2441 ; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] 2442 ; SKX-NEXT: kunpckbw %k0, %k1, %k0 # sched: [3:1.00] 2443 ; SKX-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.25] 2444 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2445 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2446 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2447 ; SKX-NEXT: retq # sched: [7:1.00] 2448 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer 2449 %1 = sitofp <16 x i1> %cmpres to <16 x double> 2450 ret <16 x double> %1 2451 } 2452 2453 define <8 x double> @sbto8f64(<8 x double> %a) { 2454 ; GENERIC-LABEL: sbto8f64: 2455 ; GENERIC: # %bb.0: 2456 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2457 ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] 2458 ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33] 2459 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2460 ; GENERIC-NEXT: retq # sched: [1:1.00] 2461 ; 2462 ; SKX-LABEL: sbto8f64: 2463 ; SKX: # %bb.0: 2464 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2465 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] 2466 ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] 2467 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2468 ; SKX-NEXT: retq # sched: [7:1.00] 2469 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer 2470 %1 = sitofp <8 x i1> %cmpres to <8 x double> 2471 ret <8 x double> %1 2472 } 2473 2474 define <8 x float> @sbto8f32(<8 x float> %a) { 2475 ; GENERIC-LABEL: sbto8f32: 2476 ; GENERIC: # %bb.0: 2477 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2478 ; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2479 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] 2480 ; GENERIC-NEXT: retq # sched: [1:1.00] 2481 ; 2482 ; SKX-LABEL: sbto8f32: 2483 ; SKX: # %bb.0: 2484 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2485 ; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 2486 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] 2487 ; SKX-NEXT: retq # sched: [7:1.00] 2488 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer 2489 %1 = sitofp <8 x i1> %cmpres to <8 x float> 2490 ret <8 x float> %1 2491 } 2492 2493 define <4 x float> @sbto4f32(<4 x float> %a) { 2494 ; GENERIC-LABEL: sbto4f32: 2495 ; GENERIC: # %bb.0: 2496 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2497 ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2498 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 2499 ; GENERIC-NEXT: retq # sched: [1:1.00] 2500 ; 2501 ; SKX-LABEL: sbto4f32: 2502 ; SKX: # %bb.0: 2503 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2504 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2505 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] 2506 ; SKX-NEXT: retq # sched: [7:1.00] 2507 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer 2508 %1 = sitofp <4 x i1> %cmpres to <4 x float> 2509 ret <4 x float> %1 2510 } 2511 2512 define <4 x double> @sbto4f64(<4 x double> %a) { 2513 ; GENERIC-LABEL: sbto4f64: 2514 ; GENERIC: # %bb.0: 2515 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2516 ; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] 2517 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 2518 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] 2519 ; GENERIC-NEXT: retq # sched: [1:1.00] 2520 ; 2521 ; SKX-LABEL: sbto4f64: 2522 ; SKX: # %bb.0: 2523 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2524 ; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] 2525 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 2526 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] 2527 ; SKX-NEXT: retq # sched: [7:1.00] 2528 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer 2529 %1 = sitofp <4 x i1> %cmpres to <4 x double> 2530 ret <4 x double> %1 2531 } 2532 2533 define <2 x float> @sbto2f32(<2 x float> %a) { 2534 ; GENERIC-LABEL: sbto2f32: 2535 ; GENERIC: # %bb.0: 2536 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2537 ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2538 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 2539 ; GENERIC-NEXT: retq # sched: [1:1.00] 2540 ; 2541 ; SKX-LABEL: sbto2f32: 2542 ; SKX: # %bb.0: 2543 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2544 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2545 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] 2546 ; SKX-NEXT: retq # sched: [7:1.00] 2547 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer 2548 %1 = sitofp <2 x i1> %cmpres to <2 x float> 2549 ret <2 x float> %1 2550 } 2551 2552 define <2 x double> @sbto2f64(<2 x double> %a) { 2553 ; GENERIC-LABEL: sbto2f64: 2554 ; GENERIC: # %bb.0: 2555 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2556 ; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2557 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2558 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 2559 ; GENERIC-NEXT: retq # sched: [1:1.00] 2560 ; 2561 ; SKX-LABEL: sbto2f64: 2562 ; SKX: # %bb.0: 2563 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2564 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2565 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2566 ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] 2567 ; SKX-NEXT: retq # sched: [7:1.00] 2568 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer 2569 %1 = sitofp <2 x i1> %cmpres to <2 x double> 2570 ret <2 x double> %1 2571 } 2572 2573 define <16 x float> @ucto16f32(<16 x i8> %a) { 2574 ; GENERIC-LABEL: ucto16f32: 2575 ; GENERIC: # %bb.0: 2576 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 2577 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2578 ; GENERIC-NEXT: retq # sched: [1:1.00] 2579 ; 2580 ; SKX-LABEL: ucto16f32: 2581 ; SKX: # %bb.0: 2582 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 2583 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2584 ; SKX-NEXT: retq # sched: [7:1.00] 2585 %b = uitofp <16 x i8> %a to <16 x float> 2586 ret <16 x float>%b 2587 } 2588 2589 define <8 x double> @ucto8f64(<8 x i8> %a) { 2590 ; GENERIC-LABEL: ucto8f64: 2591 ; GENERIC: # %bb.0: 2592 ; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2593 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 2594 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2595 ; GENERIC-NEXT: retq # sched: [1:1.00] 2596 ; 2597 ; SKX-LABEL: ucto8f64: 2598 ; SKX: # %bb.0: 2599 ; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2600 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 2601 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2602 ; SKX-NEXT: retq # sched: [7:1.00] 2603 %b = uitofp <8 x i8> %a to <8 x double> 2604 ret <8 x double> %b 2605 } 2606 2607 define <16 x float> @swto16f32(<16 x i16> %a) { 2608 ; GENERIC-LABEL: swto16f32: 2609 ; GENERIC: # %bb.0: 2610 ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] 2611 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2612 ; GENERIC-NEXT: retq # sched: [1:1.00] 2613 ; 2614 ; SKX-LABEL: swto16f32: 2615 ; SKX: # %bb.0: 2616 ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] 2617 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2618 ; SKX-NEXT: retq # sched: [7:1.00] 2619 %b = sitofp <16 x i16> %a to <16 x float> 2620 ret <16 x float> %b 2621 } 2622 2623 define <8 x double> @swto8f64(<8 x i16> %a) { 2624 ; GENERIC-LABEL: swto8f64: 2625 ; GENERIC: # %bb.0: 2626 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] 2627 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2628 ; GENERIC-NEXT: retq # sched: [1:1.00] 2629 ; 2630 ; SKX-LABEL: swto8f64: 2631 ; SKX: # %bb.0: 2632 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 2633 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2634 ; SKX-NEXT: retq # sched: [7:1.00] 2635 %b = sitofp <8 x i16> %a to <8 x double> 2636 ret <8 x double> %b 2637 } 2638 2639 define <16 x double> @swto16f64(<16 x i16> %a) { 2640 ; GENERIC-LABEL: swto16f64: 2641 ; GENERIC: # %bb.0: 2642 ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00] 2643 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2644 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2645 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2646 ; GENERIC-NEXT: retq # sched: [1:1.00] 2647 ; 2648 ; SKX-LABEL: swto16f64: 2649 ; SKX: # %bb.0: 2650 ; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00] 2651 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2652 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2653 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2654 ; SKX-NEXT: retq # sched: [7:1.00] 2655 %b = sitofp <16 x i16> %a to <16 x double> 2656 ret <16 x double> %b 2657 } 2658 2659 define <16 x double> @ucto16f64(<16 x i8> %a) { 2660 ; GENERIC-LABEL: ucto16f64: 2661 ; GENERIC: # %bb.0: 2662 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 2663 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2664 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2665 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2666 ; GENERIC-NEXT: retq # sched: [1:1.00] 2667 ; 2668 ; SKX-LABEL: ucto16f64: 2669 ; SKX: # %bb.0: 2670 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 2671 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2672 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2673 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2674 ; SKX-NEXT: retq # sched: [7:1.00] 2675 %b = uitofp <16 x i8> %a to <16 x double> 2676 ret <16 x double> %b 2677 } 2678 2679 define <16 x float> @uwto16f32(<16 x i16> %a) { 2680 ; GENERIC-LABEL: uwto16f32: 2681 ; GENERIC: # %bb.0: 2682 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 2683 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2684 ; GENERIC-NEXT: retq # sched: [1:1.00] 2685 ; 2686 ; SKX-LABEL: uwto16f32: 2687 ; SKX: # %bb.0: 2688 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 2689 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2690 ; SKX-NEXT: retq # sched: [7:1.00] 2691 %b = uitofp <16 x i16> %a to <16 x float> 2692 ret <16 x float> %b 2693 } 2694 2695 define <8 x double> @uwto8f64(<8 x i16> %a) { 2696 ; GENERIC-LABEL: uwto8f64: 2697 ; GENERIC: # %bb.0: 2698 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 2699 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2700 ; GENERIC-NEXT: retq # sched: [1:1.00] 2701 ; 2702 ; SKX-LABEL: uwto8f64: 2703 ; SKX: # %bb.0: 2704 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 2705 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2706 ; SKX-NEXT: retq # sched: [7:1.00] 2707 %b = uitofp <8 x i16> %a to <8 x double> 2708 ret <8 x double> %b 2709 } 2710 2711 define <16 x double> @uwto16f64(<16 x i16> %a) { 2712 ; GENERIC-LABEL: uwto16f64: 2713 ; GENERIC: # %bb.0: 2714 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 2715 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2716 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2717 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2718 ; GENERIC-NEXT: retq # sched: [1:1.00] 2719 ; 2720 ; SKX-LABEL: uwto16f64: 2721 ; SKX: # %bb.0: 2722 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 2723 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2724 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2725 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2726 ; SKX-NEXT: retq # sched: [7:1.00] 2727 %b = uitofp <16 x i16> %a to <16 x double> 2728 ret <16 x double> %b 2729 } 2730 2731 define <16 x float> @sito16f32(<16 x i32> %a) { 2732 ; GENERIC-LABEL: sito16f32: 2733 ; GENERIC: # %bb.0: 2734 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2735 ; GENERIC-NEXT: retq # sched: [1:1.00] 2736 ; 2737 ; SKX-LABEL: sito16f32: 2738 ; SKX: # %bb.0: 2739 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2740 ; SKX-NEXT: retq # sched: [7:1.00] 2741 %b = sitofp <16 x i32> %a to <16 x float> 2742 ret <16 x float> %b 2743 } 2744 2745 define <16 x double> @sito16f64(<16 x i32> %a) { 2746 ; GENERIC-LABEL: sito16f64: 2747 ; GENERIC: # %bb.0: 2748 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00] 2749 ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] 2750 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00] 2751 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 2752 ; GENERIC-NEXT: retq # sched: [1:1.00] 2753 ; 2754 ; SKX-LABEL: sito16f64: 2755 ; SKX: # %bb.0: 2756 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00] 2757 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] 2758 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00] 2759 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 2760 ; SKX-NEXT: retq # sched: [7:1.00] 2761 %b = sitofp <16 x i32> %a to <16 x double> 2762 ret <16 x double> %b 2763 } 2764 2765 define <16 x float> @usto16f32(<16 x i16> %a) { 2766 ; GENERIC-LABEL: usto16f32: 2767 ; GENERIC: # %bb.0: 2768 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 2769 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2770 ; GENERIC-NEXT: retq # sched: [1:1.00] 2771 ; 2772 ; SKX-LABEL: usto16f32: 2773 ; SKX: # %bb.0: 2774 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 2775 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2776 ; SKX-NEXT: retq # sched: [7:1.00] 2777 %b = uitofp <16 x i16> %a to <16 x float> 2778 ret <16 x float> %b 2779 } 2780 2781 define <16 x float> @ubto16f32(<16 x i32> %a) { 2782 ; GENERIC-LABEL: ubto16f32: 2783 ; GENERIC: # %bb.0: 2784 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 2785 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 2786 ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 2787 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2788 ; GENERIC-NEXT: retq # sched: [1:1.00] 2789 ; 2790 ; SKX-LABEL: ubto16f32: 2791 ; SKX: # %bb.0: 2792 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 2793 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 2794 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 2795 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2796 ; SKX-NEXT: retq # sched: [7:1.00] 2797 %mask = icmp slt <16 x i32> %a, zeroinitializer 2798 %1 = uitofp <16 x i1> %mask to <16 x float> 2799 ret <16 x float> %1 2800 } 2801 2802 define <16 x double> @ubto16f64(<16 x i32> %a) { 2803 ; GENERIC-LABEL: ubto16f64: 2804 ; GENERIC: # %bb.0: 2805 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 2806 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 2807 ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] 2808 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2809 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2810 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2811 ; GENERIC-NEXT: retq # sched: [1:1.00] 2812 ; 2813 ; SKX-LABEL: ubto16f64: 2814 ; SKX: # %bb.0: 2815 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 2816 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 2817 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] 2818 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2819 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2820 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2821 ; SKX-NEXT: retq # sched: [7:1.00] 2822 %mask = icmp slt <16 x i32> %a, zeroinitializer 2823 %1 = uitofp <16 x i1> %mask to <16 x double> 2824 ret <16 x double> %1 2825 } 2826 2827 define <8 x float> @ubto8f32(<8 x i32> %a) { 2828 ; GENERIC-LABEL: ubto8f32: 2829 ; GENERIC: # %bb.0: 2830 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2831 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2832 ; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] 2833 ; GENERIC-NEXT: retq # sched: [1:1.00] 2834 ; 2835 ; SKX-LABEL: ubto8f32: 2836 ; SKX: # %bb.0: 2837 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2838 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2839 ; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] 2840 ; SKX-NEXT: retq # sched: [7:1.00] 2841 %mask = icmp slt <8 x i32> %a, zeroinitializer 2842 %1 = uitofp <8 x i1> %mask to <8 x float> 2843 ret <8 x float> %1 2844 } 2845 2846 define <8 x double> @ubto8f64(<8 x i32> %a) { 2847 ; GENERIC-LABEL: ubto8f64: 2848 ; GENERIC: # %bb.0: 2849 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2850 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2851 ; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00] 2852 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2853 ; GENERIC-NEXT: retq # sched: [1:1.00] 2854 ; 2855 ; SKX-LABEL: ubto8f64: 2856 ; SKX: # %bb.0: 2857 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2858 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2859 ; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50] 2860 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2861 ; SKX-NEXT: retq # sched: [7:1.00] 2862 %mask = icmp slt <8 x i32> %a, zeroinitializer 2863 %1 = uitofp <8 x i1> %mask to <8 x double> 2864 ret <8 x double> %1 2865 } 2866 2867 define <4 x float> @ubto4f32(<4 x i32> %a) { 2868 ; GENERIC-LABEL: ubto4f32: 2869 ; GENERIC: # %bb.0: 2870 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2871 ; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2872 ; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] 2873 ; GENERIC-NEXT: retq # sched: [1:1.00] 2874 ; 2875 ; SKX-LABEL: ubto4f32: 2876 ; SKX: # %bb.0: 2877 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2878 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2879 ; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] 2880 ; SKX-NEXT: retq # sched: [7:1.00] 2881 %mask = icmp slt <4 x i32> %a, zeroinitializer 2882 %1 = uitofp <4 x i1> %mask to <4 x float> 2883 ret <4 x float> %1 2884 } 2885 2886 define <4 x double> @ubto4f64(<4 x i32> %a) { 2887 ; GENERIC-LABEL: ubto4f64: 2888 ; GENERIC: # %bb.0: 2889 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2890 ; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2891 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] 2892 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] 2893 ; GENERIC-NEXT: retq # sched: [1:1.00] 2894 ; 2895 ; SKX-LABEL: ubto4f64: 2896 ; SKX: # %bb.0: 2897 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2898 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2899 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] 2900 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] 2901 ; SKX-NEXT: retq # sched: [7:1.00] 2902 %mask = icmp slt <4 x i32> %a, zeroinitializer 2903 %1 = uitofp <4 x i1> %mask to <4 x double> 2904 ret <4 x double> %1 2905 } 2906 2907 define <2 x float> @ubto2f32(<2 x i32> %a) { 2908 ; GENERIC-LABEL: ubto2f32: 2909 ; GENERIC: # %bb.0: 2910 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2911 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] 2912 ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2913 ; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2914 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] 2915 ; GENERIC-NEXT: retq # sched: [1:1.00] 2916 ; 2917 ; SKX-LABEL: ubto2f32: 2918 ; SKX: # %bb.0: 2919 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2920 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] 2921 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2922 ; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2923 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2924 ; SKX-NEXT: retq # sched: [7:1.00] 2925 %mask = icmp ne <2 x i32> %a, zeroinitializer 2926 %1 = uitofp <2 x i1> %mask to <2 x float> 2927 ret <2 x float> %1 2928 } 2929 2930 define <2 x double> @ubto2f64(<2 x i32> %a) { 2931 ; GENERIC-LABEL: ubto2f64: 2932 ; GENERIC: # %bb.0: 2933 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2934 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] 2935 ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2936 ; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2937 ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] 2938 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 2939 ; GENERIC-NEXT: retq # sched: [1:1.00] 2940 ; 2941 ; SKX-LABEL: ubto2f64: 2942 ; SKX: # %bb.0: 2943 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2944 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] 2945 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2946 ; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2947 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2948 ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] 2949 ; SKX-NEXT: retq # sched: [7:1.00] 2950 %mask = icmp ne <2 x i32> %a, zeroinitializer 2951 %1 = uitofp <2 x i1> %mask to <2 x double> 2952 ret <2 x double> %1 2953 } 2954 2955 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 2956 ; GENERIC-LABEL: zext_8x8mem_to_8x16: 2957 ; GENERIC: # %bb.0: 2958 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 2959 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 2960 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 2961 ; GENERIC-NEXT: retq # sched: [1:1.00] 2962 ; 2963 ; SKX-LABEL: zext_8x8mem_to_8x16: 2964 ; SKX: # %bb.0: 2965 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 2966 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 2967 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 2968 ; SKX-NEXT: retq # sched: [7:1.00] 2969 %a = load <8 x i8>,<8 x i8> *%i,align 1 2970 %x = zext <8 x i8> %a to <8 x i16> 2971 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 2972 ret <8 x i16> %ret 2973 } 2974 2975 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 2976 ; GENERIC-LABEL: sext_8x8mem_to_8x16: 2977 ; GENERIC: # %bb.0: 2978 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 2979 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 2980 ; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 2981 ; GENERIC-NEXT: retq # sched: [1:1.00] 2982 ; 2983 ; SKX-LABEL: sext_8x8mem_to_8x16: 2984 ; SKX: # %bb.0: 2985 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 2986 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 2987 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 2988 ; SKX-NEXT: retq # sched: [7:1.00] 2989 %a = load <8 x i8>,<8 x i8> *%i,align 1 2990 %x = sext <8 x i8> %a to <8 x i16> 2991 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 2992 ret <8 x i16> %ret 2993 } 2994 2995 2996 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 2997 ; GENERIC-LABEL: zext_16x8mem_to_16x16: 2998 ; GENERIC: # %bb.0: 2999 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3000 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3001 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] 3002 ; GENERIC-NEXT: retq # sched: [1:1.00] 3003 ; 3004 ; SKX-LABEL: zext_16x8mem_to_16x16: 3005 ; SKX: # %bb.0: 3006 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3007 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3008 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 3009 ; SKX-NEXT: retq # sched: [7:1.00] 3010 %a = load <16 x i8>,<16 x i8> *%i,align 1 3011 %x = zext <16 x i8> %a to <16 x i16> 3012 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3013 ret <16 x i16> %ret 3014 } 3015 3016 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 3017 ; GENERIC-LABEL: sext_16x8mem_to_16x16: 3018 ; GENERIC: # %bb.0: 3019 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3020 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3021 ; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3022 ; GENERIC-NEXT: retq # sched: [1:1.00] 3023 ; 3024 ; SKX-LABEL: sext_16x8mem_to_16x16: 3025 ; SKX: # %bb.0: 3026 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3027 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3028 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3029 ; SKX-NEXT: retq # sched: [7:1.00] 3030 %a = load <16 x i8>,<16 x i8> *%i,align 1 3031 %x = sext <16 x i8> %a to <16 x i16> 3032 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3033 ret <16 x i16> %ret 3034 } 3035 3036 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 3037 ; GENERIC-LABEL: zext_16x8_to_16x16: 3038 ; GENERIC: # %bb.0: 3039 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 3040 ; GENERIC-NEXT: retq # sched: [1:1.00] 3041 ; 3042 ; SKX-LABEL: zext_16x8_to_16x16: 3043 ; SKX: # %bb.0: 3044 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 3045 ; SKX-NEXT: retq # sched: [7:1.00] 3046 %x = zext <16 x i8> %a to <16 x i16> 3047 ret <16 x i16> %x 3048 } 3049 3050 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 3051 ; GENERIC-LABEL: zext_16x8_to_16x16_mask: 3052 ; GENERIC: # %bb.0: 3053 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3054 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3055 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 3056 ; GENERIC-NEXT: retq # sched: [1:1.00] 3057 ; 3058 ; SKX-LABEL: zext_16x8_to_16x16_mask: 3059 ; SKX: # %bb.0: 3060 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3061 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3062 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 3063 ; SKX-NEXT: retq # sched: [7:1.00] 3064 %x = zext <16 x i8> %a to <16 x i16> 3065 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3066 ret <16 x i16> %ret 3067 } 3068 3069 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 3070 ; GENERIC-LABEL: sext_16x8_to_16x16: 3071 ; GENERIC: # %bb.0: 3072 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] 3073 ; GENERIC-NEXT: retq # sched: [1:1.00] 3074 ; 3075 ; SKX-LABEL: sext_16x8_to_16x16: 3076 ; SKX: # %bb.0: 3077 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 3078 ; SKX-NEXT: retq # sched: [7:1.00] 3079 %x = sext <16 x i8> %a to <16 x i16> 3080 ret <16 x i16> %x 3081 } 3082 3083 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 3084 ; GENERIC-LABEL: sext_16x8_to_16x16_mask: 3085 ; GENERIC: # %bb.0: 3086 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3087 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3088 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00] 3089 ; GENERIC-NEXT: retq # sched: [1:1.00] 3090 ; 3091 ; SKX-LABEL: sext_16x8_to_16x16_mask: 3092 ; SKX: # %bb.0: 3093 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3094 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3095 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00] 3096 ; SKX-NEXT: retq # sched: [7:1.00] 3097 %x = sext <16 x i8> %a to <16 x i16> 3098 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3099 ret <16 x i16> %ret 3100 } 3101 3102 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 3103 ; GENERIC-LABEL: zext_32x8mem_to_32x16: 3104 ; GENERIC: # %bb.0: 3105 ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] 3106 ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] 3107 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00] 3108 ; GENERIC-NEXT: retq # sched: [1:1.00] 3109 ; 3110 ; SKX-LABEL: zext_32x8mem_to_32x16: 3111 ; SKX: # %bb.0: 3112 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] 3113 ; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] 3114 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00] 3115 ; SKX-NEXT: retq # sched: [7:1.00] 3116 %a = load <32 x i8>,<32 x i8> *%i,align 1 3117 %x = zext <32 x i8> %a to <32 x i16> 3118 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3119 ret <32 x i16> %ret 3120 } 3121 3122 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 3123 ; GENERIC-LABEL: sext_32x8mem_to_32x16: 3124 ; GENERIC: # %bb.0: 3125 ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] 3126 ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] 3127 ; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3128 ; GENERIC-NEXT: retq # sched: [1:1.00] 3129 ; 3130 ; SKX-LABEL: sext_32x8mem_to_32x16: 3131 ; SKX: # %bb.0: 3132 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] 3133 ; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] 3134 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3135 ; SKX-NEXT: retq # sched: [7:1.00] 3136 %a = load <32 x i8>,<32 x i8> *%i,align 1 3137 %x = sext <32 x i8> %a to <32 x i16> 3138 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3139 ret <32 x i16> %ret 3140 } 3141 3142 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 3143 ; GENERIC-LABEL: zext_32x8_to_32x16: 3144 ; GENERIC: # %bb.0: 3145 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] 3146 ; GENERIC-NEXT: retq # sched: [1:1.00] 3147 ; 3148 ; SKX-LABEL: zext_32x8_to_32x16: 3149 ; SKX: # %bb.0: 3150 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] 3151 ; SKX-NEXT: retq # sched: [7:1.00] 3152 %x = zext <32 x i8> %a to <32 x i16> 3153 ret <32 x i16> %x 3154 } 3155 3156 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 3157 ; GENERIC-LABEL: zext_32x8_to_32x16_mask: 3158 ; GENERIC: # %bb.0: 3159 ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] 3160 ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] 3161 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] 3162 ; GENERIC-NEXT: retq # sched: [1:1.00] 3163 ; 3164 ; SKX-LABEL: zext_32x8_to_32x16_mask: 3165 ; SKX: # %bb.0: 3166 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] 3167 ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] 3168 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] 3169 ; SKX-NEXT: retq # sched: [7:1.00] 3170 %x = zext <32 x i8> %a to <32 x i16> 3171 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3172 ret <32 x i16> %ret 3173 } 3174 3175 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 3176 ; GENERIC-LABEL: sext_32x8_to_32x16: 3177 ; GENERIC: # %bb.0: 3178 ; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00] 3179 ; GENERIC-NEXT: retq # sched: [1:1.00] 3180 ; 3181 ; SKX-LABEL: sext_32x8_to_32x16: 3182 ; SKX: # %bb.0: 3183 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00] 3184 ; SKX-NEXT: retq # sched: [7:1.00] 3185 %x = sext <32 x i8> %a to <32 x i16> 3186 ret <32 x i16> %x 3187 } 3188 3189 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 3190 ; GENERIC-LABEL: sext_32x8_to_32x16_mask: 3191 ; GENERIC: # %bb.0: 3192 ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] 3193 ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] 3194 ; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00] 3195 ; GENERIC-NEXT: retq # sched: [1:1.00] 3196 ; 3197 ; SKX-LABEL: sext_32x8_to_32x16_mask: 3198 ; SKX: # %bb.0: 3199 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] 3200 ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] 3201 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00] 3202 ; SKX-NEXT: retq # sched: [7:1.00] 3203 %x = sext <32 x i8> %a to <32 x i16> 3204 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3205 ret <32 x i16> %ret 3206 } 3207 3208 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3209 ; GENERIC-LABEL: zext_4x8mem_to_4x32: 3210 ; GENERIC: # %bb.0: 3211 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3212 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3213 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] 3214 ; GENERIC-NEXT: retq # sched: [1:1.00] 3215 ; 3216 ; SKX-LABEL: zext_4x8mem_to_4x32: 3217 ; SKX: # %bb.0: 3218 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3219 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3220 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] 3221 ; SKX-NEXT: retq # sched: [7:1.00] 3222 %a = load <4 x i8>,<4 x i8> *%i,align 1 3223 %x = zext <4 x i8> %a to <4 x i32> 3224 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3225 ret <4 x i32> %ret 3226 } 3227 3228 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3229 ; GENERIC-LABEL: sext_4x8mem_to_4x32: 3230 ; GENERIC: # %bb.0: 3231 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3232 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3233 ; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3234 ; GENERIC-NEXT: retq # sched: [1:1.00] 3235 ; 3236 ; SKX-LABEL: sext_4x8mem_to_4x32: 3237 ; SKX: # %bb.0: 3238 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3239 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3240 ; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3241 ; SKX-NEXT: retq # sched: [7:1.00] 3242 %a = load <4 x i8>,<4 x i8> *%i,align 1 3243 %x = sext <4 x i8> %a to <4 x i32> 3244 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3245 ret <4 x i32> %ret 3246 } 3247 3248 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3249 ; GENERIC-LABEL: zext_8x8mem_to_8x32: 3250 ; GENERIC: # %bb.0: 3251 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3252 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3253 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] 3254 ; GENERIC-NEXT: retq # sched: [1:1.00] 3255 ; 3256 ; SKX-LABEL: zext_8x8mem_to_8x32: 3257 ; SKX: # %bb.0: 3258 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3259 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3260 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 3261 ; SKX-NEXT: retq # sched: [7:1.00] 3262 %a = load <8 x i8>,<8 x i8> *%i,align 1 3263 %x = zext <8 x i8> %a to <8 x i32> 3264 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3265 ret <8 x i32> %ret 3266 } 3267 3268 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3269 ; GENERIC-LABEL: sext_8x8mem_to_8x32: 3270 ; GENERIC: # %bb.0: 3271 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3272 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3273 ; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3274 ; GENERIC-NEXT: retq # sched: [1:1.00] 3275 ; 3276 ; SKX-LABEL: sext_8x8mem_to_8x32: 3277 ; SKX: # %bb.0: 3278 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3279 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3280 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3281 ; SKX-NEXT: retq # sched: [7:1.00] 3282 %a = load <8 x i8>,<8 x i8> *%i,align 1 3283 %x = sext <8 x i8> %a to <8 x i32> 3284 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3285 ret <8 x i32> %ret 3286 } 3287 3288 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 3289 ; GENERIC-LABEL: zext_16x8mem_to_16x32: 3290 ; GENERIC: # %bb.0: 3291 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3292 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3293 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00] 3294 ; GENERIC-NEXT: retq # sched: [1:1.00] 3295 ; 3296 ; SKX-LABEL: zext_16x8mem_to_16x32: 3297 ; SKX: # %bb.0: 3298 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3299 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3300 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00] 3301 ; SKX-NEXT: retq # sched: [7:1.00] 3302 %a = load <16 x i8>,<16 x i8> *%i,align 1 3303 %x = zext <16 x i8> %a to <16 x i32> 3304 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3305 ret <16 x i32> %ret 3306 } 3307 3308 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 3309 ; GENERIC-LABEL: sext_16x8mem_to_16x32: 3310 ; GENERIC: # %bb.0: 3311 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3312 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3313 ; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3314 ; GENERIC-NEXT: retq # sched: [1:1.00] 3315 ; 3316 ; SKX-LABEL: sext_16x8mem_to_16x32: 3317 ; SKX: # %bb.0: 3318 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3319 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3320 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3321 ; SKX-NEXT: retq # sched: [7:1.00] 3322 %a = load <16 x i8>,<16 x i8> *%i,align 1 3323 %x = sext <16 x i8> %a to <16 x i32> 3324 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3325 ret <16 x i32> %ret 3326 } 3327 3328 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 3329 ; GENERIC-LABEL: zext_16x8_to_16x32_mask: 3330 ; GENERIC: # %bb.0: 3331 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3332 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3333 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 3334 ; GENERIC-NEXT: retq # sched: [1:1.00] 3335 ; 3336 ; SKX-LABEL: zext_16x8_to_16x32_mask: 3337 ; SKX: # %bb.0: 3338 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3339 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3340 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 3341 ; SKX-NEXT: retq # sched: [7:1.00] 3342 %x = zext <16 x i8> %a to <16 x i32> 3343 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3344 ret <16 x i32> %ret 3345 } 3346 3347 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 3348 ; GENERIC-LABEL: sext_16x8_to_16x32_mask: 3349 ; GENERIC: # %bb.0: 3350 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3351 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3352 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 3353 ; GENERIC-NEXT: retq # sched: [1:1.00] 3354 ; 3355 ; SKX-LABEL: sext_16x8_to_16x32_mask: 3356 ; SKX: # %bb.0: 3357 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3358 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3359 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 3360 ; SKX-NEXT: retq # sched: [7:1.00] 3361 %x = sext <16 x i8> %a to <16 x i32> 3362 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3363 ret <16 x i32> %ret 3364 } 3365 3366 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 3367 ; GENERIC-LABEL: zext_16x8_to_16x32: 3368 ; GENERIC: # %bb.0: 3369 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 3370 ; GENERIC-NEXT: retq # sched: [1:1.00] 3371 ; 3372 ; SKX-LABEL: zext_16x8_to_16x32: 3373 ; SKX: # %bb.0: 3374 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 3375 ; SKX-NEXT: retq # sched: [7:1.00] 3376 %x = zext <16 x i8> %i to <16 x i32> 3377 ret <16 x i32> %x 3378 } 3379 3380 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 3381 ; GENERIC-LABEL: sext_16x8_to_16x32: 3382 ; GENERIC: # %bb.0: 3383 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] 3384 ; GENERIC-NEXT: retq # sched: [1:1.00] 3385 ; 3386 ; SKX-LABEL: sext_16x8_to_16x32: 3387 ; SKX: # %bb.0: 3388 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] 3389 ; SKX-NEXT: retq # sched: [7:1.00] 3390 %x = sext <16 x i8> %i to <16 x i32> 3391 ret <16 x i32> %x 3392 } 3393 3394 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 3395 ; GENERIC-LABEL: zext_2x8mem_to_2x64: 3396 ; GENERIC: # %bb.0: 3397 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3398 ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3399 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 3400 ; GENERIC-NEXT: retq # sched: [1:1.00] 3401 ; 3402 ; SKX-LABEL: zext_2x8mem_to_2x64: 3403 ; SKX: # %bb.0: 3404 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3405 ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3406 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] 3407 ; SKX-NEXT: retq # sched: [7:1.00] 3408 %a = load <2 x i8>,<2 x i8> *%i,align 1 3409 %x = zext <2 x i8> %a to <2 x i64> 3410 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3411 ret <2 x i64> %ret 3412 } 3413 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 3414 ; GENERIC-LABEL: sext_2x8mem_to_2x64mask: 3415 ; GENERIC: # %bb.0: 3416 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3417 ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3418 ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3419 ; GENERIC-NEXT: retq # sched: [1:1.00] 3420 ; 3421 ; SKX-LABEL: sext_2x8mem_to_2x64mask: 3422 ; SKX: # %bb.0: 3423 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3424 ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3425 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3426 ; SKX-NEXT: retq # sched: [7:1.00] 3427 %a = load <2 x i8>,<2 x i8> *%i,align 1 3428 %x = sext <2 x i8> %a to <2 x i64> 3429 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3430 ret <2 x i64> %ret 3431 } 3432 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { 3433 ; GENERIC-LABEL: sext_2x8mem_to_2x64: 3434 ; GENERIC: # %bb.0: 3435 ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50] 3436 ; GENERIC-NEXT: retq # sched: [1:1.00] 3437 ; 3438 ; SKX-LABEL: sext_2x8mem_to_2x64: 3439 ; SKX: # %bb.0: 3440 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3441 ; SKX-NEXT: retq # sched: [7:1.00] 3442 %a = load <2 x i8>,<2 x i8> *%i,align 1 3443 %x = sext <2 x i8> %a to <2 x i64> 3444 ret <2 x i64> %x 3445 } 3446 3447 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3448 ; GENERIC-LABEL: zext_4x8mem_to_4x64: 3449 ; GENERIC: # %bb.0: 3450 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3451 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3452 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 3453 ; GENERIC-NEXT: retq # sched: [1:1.00] 3454 ; 3455 ; SKX-LABEL: zext_4x8mem_to_4x64: 3456 ; SKX: # %bb.0: 3457 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3458 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3459 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 3460 ; SKX-NEXT: retq # sched: [7:1.00] 3461 %a = load <4 x i8>,<4 x i8> *%i,align 1 3462 %x = zext <4 x i8> %a to <4 x i64> 3463 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3464 ret <4 x i64> %ret 3465 } 3466 3467 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3468 ; GENERIC-LABEL: sext_4x8mem_to_4x64mask: 3469 ; GENERIC: # %bb.0: 3470 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3471 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3472 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3473 ; GENERIC-NEXT: retq # sched: [1:1.00] 3474 ; 3475 ; SKX-LABEL: sext_4x8mem_to_4x64mask: 3476 ; SKX: # %bb.0: 3477 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3478 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3479 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3480 ; SKX-NEXT: retq # sched: [7:1.00] 3481 %a = load <4 x i8>,<4 x i8> *%i,align 1 3482 %x = sext <4 x i8> %a to <4 x i64> 3483 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3484 ret <4 x i64> %ret 3485 } 3486 3487 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { 3488 ; GENERIC-LABEL: sext_4x8mem_to_4x64: 3489 ; GENERIC: # %bb.0: 3490 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] 3491 ; GENERIC-NEXT: retq # sched: [1:1.00] 3492 ; 3493 ; SKX-LABEL: sext_4x8mem_to_4x64: 3494 ; SKX: # %bb.0: 3495 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] 3496 ; SKX-NEXT: retq # sched: [7:1.00] 3497 %a = load <4 x i8>,<4 x i8> *%i,align 1 3498 %x = sext <4 x i8> %a to <4 x i64> 3499 ret <4 x i64> %x 3500 } 3501 3502 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3503 ; GENERIC-LABEL: zext_8x8mem_to_8x64: 3504 ; GENERIC: # %bb.0: 3505 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3506 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3507 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 3508 ; GENERIC-NEXT: retq # sched: [1:1.00] 3509 ; 3510 ; SKX-LABEL: zext_8x8mem_to_8x64: 3511 ; SKX: # %bb.0: 3512 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3513 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3514 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 3515 ; SKX-NEXT: retq # sched: [7:1.00] 3516 %a = load <8 x i8>,<8 x i8> *%i,align 1 3517 %x = zext <8 x i8> %a to <8 x i64> 3518 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3519 ret <8 x i64> %ret 3520 } 3521 3522 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3523 ; GENERIC-LABEL: sext_8x8mem_to_8x64mask: 3524 ; GENERIC: # %bb.0: 3525 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3526 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3527 ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3528 ; GENERIC-NEXT: retq # sched: [1:1.00] 3529 ; 3530 ; SKX-LABEL: sext_8x8mem_to_8x64mask: 3531 ; SKX: # %bb.0: 3532 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3533 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3534 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3535 ; SKX-NEXT: retq # sched: [7:1.00] 3536 %a = load <8 x i8>,<8 x i8> *%i,align 1 3537 %x = sext <8 x i8> %a to <8 x i64> 3538 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3539 ret <8 x i64> %ret 3540 } 3541 3542 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { 3543 ; GENERIC-LABEL: sext_8x8mem_to_8x64: 3544 ; GENERIC: # %bb.0: 3545 ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] 3546 ; GENERIC-NEXT: retq # sched: [1:1.00] 3547 ; 3548 ; SKX-LABEL: sext_8x8mem_to_8x64: 3549 ; SKX: # %bb.0: 3550 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] 3551 ; SKX-NEXT: retq # sched: [7:1.00] 3552 %a = load <8 x i8>,<8 x i8> *%i,align 1 3553 %x = sext <8 x i8> %a to <8 x i64> 3554 ret <8 x i64> %x 3555 } 3556 3557 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3558 ; GENERIC-LABEL: zext_4x16mem_to_4x32: 3559 ; GENERIC: # %bb.0: 3560 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3561 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3562 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] 3563 ; GENERIC-NEXT: retq # sched: [1:1.00] 3564 ; 3565 ; SKX-LABEL: zext_4x16mem_to_4x32: 3566 ; SKX: # %bb.0: 3567 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3568 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3569 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] 3570 ; SKX-NEXT: retq # sched: [7:1.00] 3571 %a = load <4 x i16>,<4 x i16> *%i,align 1 3572 %x = zext <4 x i16> %a to <4 x i32> 3573 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3574 ret <4 x i32> %ret 3575 } 3576 3577 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3578 ; GENERIC-LABEL: sext_4x16mem_to_4x32mask: 3579 ; GENERIC: # %bb.0: 3580 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3581 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3582 ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3583 ; GENERIC-NEXT: retq # sched: [1:1.00] 3584 ; 3585 ; SKX-LABEL: sext_4x16mem_to_4x32mask: 3586 ; SKX: # %bb.0: 3587 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3588 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3589 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3590 ; SKX-NEXT: retq # sched: [7:1.00] 3591 %a = load <4 x i16>,<4 x i16> *%i,align 1 3592 %x = sext <4 x i16> %a to <4 x i32> 3593 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3594 ret <4 x i32> %ret 3595 } 3596 3597 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { 3598 ; GENERIC-LABEL: sext_4x16mem_to_4x32: 3599 ; GENERIC: # %bb.0: 3600 ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50] 3601 ; GENERIC-NEXT: retq # sched: [1:1.00] 3602 ; 3603 ; SKX-LABEL: sext_4x16mem_to_4x32: 3604 ; SKX: # %bb.0: 3605 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3606 ; SKX-NEXT: retq # sched: [7:1.00] 3607 %a = load <4 x i16>,<4 x i16> *%i,align 1 3608 %x = sext <4 x i16> %a to <4 x i32> 3609 ret <4 x i32> %x 3610 } 3611 3612 3613 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3614 ; GENERIC-LABEL: zext_8x16mem_to_8x32: 3615 ; GENERIC: # %bb.0: 3616 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3617 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3618 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 3619 ; GENERIC-NEXT: retq # sched: [1:1.00] 3620 ; 3621 ; SKX-LABEL: zext_8x16mem_to_8x32: 3622 ; SKX: # %bb.0: 3623 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3624 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3625 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] 3626 ; SKX-NEXT: retq # sched: [7:1.00] 3627 %a = load <8 x i16>,<8 x i16> *%i,align 1 3628 %x = zext <8 x i16> %a to <8 x i32> 3629 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3630 ret <8 x i32> %ret 3631 } 3632 3633 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3634 ; GENERIC-LABEL: sext_8x16mem_to_8x32mask: 3635 ; GENERIC: # %bb.0: 3636 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3637 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3638 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3639 ; GENERIC-NEXT: retq # sched: [1:1.00] 3640 ; 3641 ; SKX-LABEL: sext_8x16mem_to_8x32mask: 3642 ; SKX: # %bb.0: 3643 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3644 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3645 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3646 ; SKX-NEXT: retq # sched: [7:1.00] 3647 %a = load <8 x i16>,<8 x i16> *%i,align 1 3648 %x = sext <8 x i16> %a to <8 x i32> 3649 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3650 ret <8 x i32> %ret 3651 } 3652 3653 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { 3654 ; GENERIC-LABEL: sext_8x16mem_to_8x32: 3655 ; GENERIC: # %bb.0: 3656 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00] 3657 ; GENERIC-NEXT: retq # sched: [1:1.00] 3658 ; 3659 ; SKX-LABEL: sext_8x16mem_to_8x32: 3660 ; SKX: # %bb.0: 3661 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00] 3662 ; SKX-NEXT: retq # sched: [7:1.00] 3663 %a = load <8 x i16>,<8 x i16> *%i,align 1 3664 %x = sext <8 x i16> %a to <8 x i32> 3665 ret <8 x i32> %x 3666 } 3667 3668 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 3669 ; GENERIC-LABEL: zext_8x16_to_8x32mask: 3670 ; GENERIC: # %bb.0: 3671 ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] 3672 ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] 3673 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 3674 ; GENERIC-NEXT: retq # sched: [1:1.00] 3675 ; 3676 ; SKX-LABEL: zext_8x16_to_8x32mask: 3677 ; SKX: # %bb.0: 3678 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] 3679 ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] 3680 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 3681 ; SKX-NEXT: retq # sched: [7:1.00] 3682 %x = zext <8 x i16> %a to <8 x i32> 3683 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3684 ret <8 x i32> %ret 3685 } 3686 3687 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { 3688 ; GENERIC-LABEL: zext_8x16_to_8x32: 3689 ; GENERIC: # %bb.0: 3690 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 3691 ; GENERIC-NEXT: retq # sched: [1:1.00] 3692 ; 3693 ; SKX-LABEL: zext_8x16_to_8x32: 3694 ; SKX: # %bb.0: 3695 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 3696 ; SKX-NEXT: retq # sched: [7:1.00] 3697 %x = zext <8 x i16> %a to <8 x i32> 3698 ret <8 x i32> %x 3699 } 3700 3701 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 3702 ; GENERIC-LABEL: zext_16x16mem_to_16x32: 3703 ; GENERIC: # %bb.0: 3704 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3705 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3706 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] 3707 ; GENERIC-NEXT: retq # sched: [1:1.00] 3708 ; 3709 ; SKX-LABEL: zext_16x16mem_to_16x32: 3710 ; SKX: # %bb.0: 3711 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3712 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3713 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 3714 ; SKX-NEXT: retq # sched: [7:1.00] 3715 %a = load <16 x i16>,<16 x i16> *%i,align 1 3716 %x = zext <16 x i16> %a to <16 x i32> 3717 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3718 ret <16 x i32> %ret 3719 } 3720 3721 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 3722 ; GENERIC-LABEL: sext_16x16mem_to_16x32mask: 3723 ; GENERIC: # %bb.0: 3724 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3725 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3726 ; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3727 ; GENERIC-NEXT: retq # sched: [1:1.00] 3728 ; 3729 ; SKX-LABEL: sext_16x16mem_to_16x32mask: 3730 ; SKX: # %bb.0: 3731 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3732 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3733 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3734 ; SKX-NEXT: retq # sched: [7:1.00] 3735 %a = load <16 x i16>,<16 x i16> *%i,align 1 3736 %x = sext <16 x i16> %a to <16 x i32> 3737 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3738 ret <16 x i32> %ret 3739 } 3740 3741 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { 3742 ; GENERIC-LABEL: sext_16x16mem_to_16x32: 3743 ; GENERIC: # %bb.0: 3744 ; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00] 3745 ; GENERIC-NEXT: retq # sched: [1:1.00] 3746 ; 3747 ; SKX-LABEL: sext_16x16mem_to_16x32: 3748 ; SKX: # %bb.0: 3749 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00] 3750 ; SKX-NEXT: retq # sched: [7:1.00] 3751 %a = load <16 x i16>,<16 x i16> *%i,align 1 3752 %x = sext <16 x i16> %a to <16 x i32> 3753 ret <16 x i32> %x 3754 } 3755 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { 3756 ; GENERIC-LABEL: zext_16x16_to_16x32mask: 3757 ; GENERIC: # %bb.0: 3758 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3759 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3760 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 3761 ; GENERIC-NEXT: retq # sched: [1:1.00] 3762 ; 3763 ; SKX-LABEL: zext_16x16_to_16x32mask: 3764 ; SKX: # %bb.0: 3765 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3766 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3767 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 3768 ; SKX-NEXT: retq # sched: [7:1.00] 3769 %x = zext <16 x i16> %a to <16 x i32> 3770 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3771 ret <16 x i32> %ret 3772 } 3773 3774 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { 3775 ; GENERIC-LABEL: zext_16x16_to_16x32: 3776 ; GENERIC: # %bb.0: 3777 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 3778 ; GENERIC-NEXT: retq # sched: [1:1.00] 3779 ; 3780 ; SKX-LABEL: zext_16x16_to_16x32: 3781 ; SKX: # %bb.0: 3782 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 3783 ; SKX-NEXT: retq # sched: [7:1.00] 3784 %x = zext <16 x i16> %a to <16 x i32> 3785 ret <16 x i32> %x 3786 } 3787 3788 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 3789 ; GENERIC-LABEL: zext_2x16mem_to_2x64: 3790 ; GENERIC: # %bb.0: 3791 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3792 ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3793 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00] 3794 ; GENERIC-NEXT: retq # sched: [1:1.00] 3795 ; 3796 ; SKX-LABEL: zext_2x16mem_to_2x64: 3797 ; SKX: # %bb.0: 3798 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3799 ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3800 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00] 3801 ; SKX-NEXT: retq # sched: [7:1.00] 3802 %a = load <2 x i16>,<2 x i16> *%i,align 1 3803 %x = zext <2 x i16> %a to <2 x i64> 3804 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3805 ret <2 x i64> %ret 3806 } 3807 3808 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 3809 ; GENERIC-LABEL: sext_2x16mem_to_2x64mask: 3810 ; GENERIC: # %bb.0: 3811 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3812 ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3813 ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3814 ; GENERIC-NEXT: retq # sched: [1:1.00] 3815 ; 3816 ; SKX-LABEL: sext_2x16mem_to_2x64mask: 3817 ; SKX: # %bb.0: 3818 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3819 ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3820 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3821 ; SKX-NEXT: retq # sched: [7:1.00] 3822 %a = load <2 x i16>,<2 x i16> *%i,align 1 3823 %x = sext <2 x i16> %a to <2 x i64> 3824 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3825 ret <2 x i64> %ret 3826 } 3827 3828 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { 3829 ; GENERIC-LABEL: sext_2x16mem_to_2x64: 3830 ; GENERIC: # %bb.0: 3831 ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50] 3832 ; GENERIC-NEXT: retq # sched: [1:1.00] 3833 ; 3834 ; SKX-LABEL: sext_2x16mem_to_2x64: 3835 ; SKX: # %bb.0: 3836 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3837 ; SKX-NEXT: retq # sched: [7:1.00] 3838 %a = load <2 x i16>,<2 x i16> *%i,align 1 3839 %x = sext <2 x i16> %a to <2 x i64> 3840 ret <2 x i64> %x 3841 } 3842 3843 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3844 ; GENERIC-LABEL: zext_4x16mem_to_4x64: 3845 ; GENERIC: # %bb.0: 3846 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3847 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3848 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] 3849 ; GENERIC-NEXT: retq # sched: [1:1.00] 3850 ; 3851 ; SKX-LABEL: zext_4x16mem_to_4x64: 3852 ; SKX: # %bb.0: 3853 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3854 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3855 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 3856 ; SKX-NEXT: retq # sched: [7:1.00] 3857 %a = load <4 x i16>,<4 x i16> *%i,align 1 3858 %x = zext <4 x i16> %a to <4 x i64> 3859 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3860 ret <4 x i64> %ret 3861 } 3862 3863 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3864 ; GENERIC-LABEL: sext_4x16mem_to_4x64mask: 3865 ; GENERIC: # %bb.0: 3866 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3867 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3868 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3869 ; GENERIC-NEXT: retq # sched: [1:1.00] 3870 ; 3871 ; SKX-LABEL: sext_4x16mem_to_4x64mask: 3872 ; SKX: # %bb.0: 3873 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3874 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3875 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3876 ; SKX-NEXT: retq # sched: [7:1.00] 3877 %a = load <4 x i16>,<4 x i16> *%i,align 1 3878 %x = sext <4 x i16> %a to <4 x i64> 3879 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3880 ret <4 x i64> %ret 3881 } 3882 3883 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { 3884 ; GENERIC-LABEL: sext_4x16mem_to_4x64: 3885 ; GENERIC: # %bb.0: 3886 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] 3887 ; GENERIC-NEXT: retq # sched: [1:1.00] 3888 ; 3889 ; SKX-LABEL: sext_4x16mem_to_4x64: 3890 ; SKX: # %bb.0: 3891 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] 3892 ; SKX-NEXT: retq # sched: [7:1.00] 3893 %a = load <4 x i16>,<4 x i16> *%i,align 1 3894 %x = sext <4 x i16> %a to <4 x i64> 3895 ret <4 x i64> %x 3896 } 3897 3898 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3899 ; GENERIC-LABEL: zext_8x16mem_to_8x64: 3900 ; GENERIC: # %bb.0: 3901 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3902 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3903 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] 3904 ; GENERIC-NEXT: retq # sched: [1:1.00] 3905 ; 3906 ; SKX-LABEL: zext_8x16mem_to_8x64: 3907 ; SKX: # %bb.0: 3908 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3909 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3910 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 3911 ; SKX-NEXT: retq # sched: [7:1.00] 3912 %a = load <8 x i16>,<8 x i16> *%i,align 1 3913 %x = zext <8 x i16> %a to <8 x i64> 3914 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3915 ret <8 x i64> %ret 3916 } 3917 3918 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3919 ; GENERIC-LABEL: sext_8x16mem_to_8x64mask: 3920 ; GENERIC: # %bb.0: 3921 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3922 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3923 ; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3924 ; GENERIC-NEXT: retq # sched: [1:1.00] 3925 ; 3926 ; SKX-LABEL: sext_8x16mem_to_8x64mask: 3927 ; SKX: # %bb.0: 3928 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3929 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3930 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3931 ; SKX-NEXT: retq # sched: [7:1.00] 3932 %a = load <8 x i16>,<8 x i16> *%i,align 1 3933 %x = sext <8 x i16> %a to <8 x i64> 3934 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3935 ret <8 x i64> %ret 3936 } 3937 3938 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { 3939 ; GENERIC-LABEL: sext_8x16mem_to_8x64: 3940 ; GENERIC: # %bb.0: 3941 ; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00] 3942 ; GENERIC-NEXT: retq # sched: [1:1.00] 3943 ; 3944 ; SKX-LABEL: sext_8x16mem_to_8x64: 3945 ; SKX: # %bb.0: 3946 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00] 3947 ; SKX-NEXT: retq # sched: [7:1.00] 3948 %a = load <8 x i16>,<8 x i16> *%i,align 1 3949 %x = sext <8 x i16> %a to <8 x i64> 3950 ret <8 x i64> %x 3951 } 3952 3953 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 3954 ; GENERIC-LABEL: zext_8x16_to_8x64mask: 3955 ; GENERIC: # %bb.0: 3956 ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] 3957 ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] 3958 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 3959 ; GENERIC-NEXT: retq # sched: [1:1.00] 3960 ; 3961 ; SKX-LABEL: zext_8x16_to_8x64mask: 3962 ; SKX: # %bb.0: 3963 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] 3964 ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] 3965 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 3966 ; SKX-NEXT: retq # sched: [7:1.00] 3967 %x = zext <8 x i16> %a to <8 x i64> 3968 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3969 ret <8 x i64> %ret 3970 } 3971 3972 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { 3973 ; GENERIC-LABEL: zext_8x16_to_8x64: 3974 ; GENERIC: # %bb.0: 3975 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 3976 ; GENERIC-NEXT: retq # sched: [1:1.00] 3977 ; 3978 ; SKX-LABEL: zext_8x16_to_8x64: 3979 ; SKX: # %bb.0: 3980 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 3981 ; SKX-NEXT: retq # sched: [7:1.00] 3982 %ret = zext <8 x i16> %a to <8 x i64> 3983 ret <8 x i64> %ret 3984 } 3985 3986 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 3987 ; GENERIC-LABEL: zext_2x32mem_to_2x64: 3988 ; GENERIC: # %bb.0: 3989 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3990 ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3991 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00] 3992 ; GENERIC-NEXT: retq # sched: [1:1.00] 3993 ; 3994 ; SKX-LABEL: zext_2x32mem_to_2x64: 3995 ; SKX: # %bb.0: 3996 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3997 ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3998 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00] 3999 ; SKX-NEXT: retq # sched: [7:1.00] 4000 %a = load <2 x i32>,<2 x i32> *%i,align 1 4001 %x = zext <2 x i32> %a to <2 x i64> 4002 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 4003 ret <2 x i64> %ret 4004 } 4005 4006 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 4007 ; GENERIC-LABEL: sext_2x32mem_to_2x64mask: 4008 ; GENERIC: # %bb.0: 4009 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 4010 ; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 4011 ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 4012 ; GENERIC-NEXT: retq # sched: [1:1.00] 4013 ; 4014 ; SKX-LABEL: sext_2x32mem_to_2x64mask: 4015 ; SKX: # %bb.0: 4016 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 4017 ; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 4018 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 4019 ; SKX-NEXT: retq # sched: [7:1.00] 4020 %a = load <2 x i32>,<2 x i32> *%i,align 1 4021 %x = sext <2 x i32> %a to <2 x i64> 4022 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 4023 ret <2 x i64> %ret 4024 } 4025 4026 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { 4027 ; GENERIC-LABEL: sext_2x32mem_to_2x64: 4028 ; GENERIC: # %bb.0: 4029 ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50] 4030 ; GENERIC-NEXT: retq # sched: [1:1.00] 4031 ; 4032 ; SKX-LABEL: sext_2x32mem_to_2x64: 4033 ; SKX: # %bb.0: 4034 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 4035 ; SKX-NEXT: retq # sched: [7:1.00] 4036 %a = load <2 x i32>,<2 x i32> *%i,align 1 4037 %x = sext <2 x i32> %a to <2 x i64> 4038 ret <2 x i64> %x 4039 } 4040 4041 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 4042 ; GENERIC-LABEL: zext_4x32mem_to_4x64: 4043 ; GENERIC: # %bb.0: 4044 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 4045 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 4046 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] 4047 ; GENERIC-NEXT: retq # sched: [1:1.00] 4048 ; 4049 ; SKX-LABEL: zext_4x32mem_to_4x64: 4050 ; SKX: # %bb.0: 4051 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 4052 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 4053 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4054 ; SKX-NEXT: retq # sched: [7:1.00] 4055 %a = load <4 x i32>,<4 x i32> *%i,align 1 4056 %x = zext <4 x i32> %a to <4 x i64> 4057 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 4058 ret <4 x i64> %ret 4059 } 4060 4061 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 4062 ; GENERIC-LABEL: sext_4x32mem_to_4x64mask: 4063 ; GENERIC: # %bb.0: 4064 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 4065 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 4066 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 4067 ; GENERIC-NEXT: retq # sched: [1:1.00] 4068 ; 4069 ; SKX-LABEL: sext_4x32mem_to_4x64mask: 4070 ; SKX: # %bb.0: 4071 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 4072 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 4073 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 4074 ; SKX-NEXT: retq # sched: [7:1.00] 4075 %a = load <4 x i32>,<4 x i32> *%i,align 1 4076 %x = sext <4 x i32> %a to <4 x i64> 4077 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 4078 ret <4 x i64> %ret 4079 } 4080 4081 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { 4082 ; GENERIC-LABEL: sext_4x32mem_to_4x64: 4083 ; GENERIC: # %bb.0: 4084 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00] 4085 ; GENERIC-NEXT: retq # sched: [1:1.00] 4086 ; 4087 ; SKX-LABEL: sext_4x32mem_to_4x64: 4088 ; SKX: # %bb.0: 4089 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00] 4090 ; SKX-NEXT: retq # sched: [7:1.00] 4091 %a = load <4 x i32>,<4 x i32> *%i,align 1 4092 %x = sext <4 x i32> %a to <4 x i64> 4093 ret <4 x i64> %x 4094 } 4095 4096 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { 4097 ; GENERIC-LABEL: sext_4x32_to_4x64: 4098 ; GENERIC: # %bb.0: 4099 ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] 4100 ; GENERIC-NEXT: retq # sched: [1:1.00] 4101 ; 4102 ; SKX-LABEL: sext_4x32_to_4x64: 4103 ; SKX: # %bb.0: 4104 ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4105 ; SKX-NEXT: retq # sched: [7:1.00] 4106 %x = sext <4 x i32> %a to <4 x i64> 4107 ret <4 x i64> %x 4108 } 4109 4110 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { 4111 ; GENERIC-LABEL: zext_4x32_to_4x64mask: 4112 ; GENERIC: # %bb.0: 4113 ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] 4114 ; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] 4115 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4116 ; GENERIC-NEXT: retq # sched: [1:1.00] 4117 ; 4118 ; SKX-LABEL: zext_4x32_to_4x64mask: 4119 ; SKX: # %bb.0: 4120 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] 4121 ; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] 4122 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4123 ; SKX-NEXT: retq # sched: [7:1.00] 4124 %x = zext <4 x i32> %a to <4 x i64> 4125 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 4126 ret <4 x i64> %ret 4127 } 4128 4129 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 4130 ; GENERIC-LABEL: zext_8x32mem_to_8x64: 4131 ; GENERIC: # %bb.0: 4132 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 4133 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 4134 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 4135 ; GENERIC-NEXT: retq # sched: [1:1.00] 4136 ; 4137 ; SKX-LABEL: zext_8x32mem_to_8x64: 4138 ; SKX: # %bb.0: 4139 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 4140 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 4141 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] 4142 ; SKX-NEXT: retq # sched: [7:1.00] 4143 %a = load <8 x i32>,<8 x i32> *%i,align 1 4144 %x = zext <8 x i32> %a to <8 x i64> 4145 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 4146 ret <8 x i64> %ret 4147 } 4148 4149 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 4150 ; GENERIC-LABEL: sext_8x32mem_to_8x64mask: 4151 ; GENERIC: # %bb.0: 4152 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 4153 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 4154 ; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 4155 ; GENERIC-NEXT: retq # sched: [1:1.00] 4156 ; 4157 ; SKX-LABEL: sext_8x32mem_to_8x64mask: 4158 ; SKX: # %bb.0: 4159 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 4160 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 4161 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 4162 ; SKX-NEXT: retq # sched: [7:1.00] 4163 %a = load <8 x i32>,<8 x i32> *%i,align 1 4164 %x = sext <8 x i32> %a to <8 x i64> 4165 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 4166 ret <8 x i64> %ret 4167 } 4168 4169 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { 4170 ; GENERIC-LABEL: sext_8x32mem_to_8x64: 4171 ; GENERIC: # %bb.0: 4172 ; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00] 4173 ; GENERIC-NEXT: retq # sched: [1:1.00] 4174 ; 4175 ; SKX-LABEL: sext_8x32mem_to_8x64: 4176 ; SKX: # %bb.0: 4177 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00] 4178 ; SKX-NEXT: retq # sched: [7:1.00] 4179 %a = load <8 x i32>,<8 x i32> *%i,align 1 4180 %x = sext <8 x i32> %a to <8 x i64> 4181 ret <8 x i64> %x 4182 } 4183 4184 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { 4185 ; GENERIC-LABEL: sext_8x32_to_8x64: 4186 ; GENERIC: # %bb.0: 4187 ; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00] 4188 ; GENERIC-NEXT: retq # sched: [1:1.00] 4189 ; 4190 ; SKX-LABEL: sext_8x32_to_8x64: 4191 ; SKX: # %bb.0: 4192 ; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00] 4193 ; SKX-NEXT: retq # sched: [7:1.00] 4194 %x = sext <8 x i32> %a to <8 x i64> 4195 ret <8 x i64> %x 4196 } 4197 4198 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { 4199 ; GENERIC-LABEL: zext_8x32_to_8x64mask: 4200 ; GENERIC: # %bb.0: 4201 ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] 4202 ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] 4203 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00] 4204 ; GENERIC-NEXT: retq # sched: [1:1.00] 4205 ; 4206 ; SKX-LABEL: zext_8x32_to_8x64mask: 4207 ; SKX: # %bb.0: 4208 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] 4209 ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] 4210 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00] 4211 ; SKX-NEXT: retq # sched: [7:1.00] 4212 %x = zext <8 x i32> %a to <8 x i64> 4213 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 4214 ret <8 x i64> %ret 4215 } 4216 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { 4217 ; GENERIC-LABEL: fptrunc_test: 4218 ; GENERIC: # %bb.0: 4219 ; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] 4220 ; GENERIC-NEXT: retq # sched: [1:1.00] 4221 ; 4222 ; SKX-LABEL: fptrunc_test: 4223 ; SKX: # %bb.0: 4224 ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] 4225 ; SKX-NEXT: retq # sched: [7:1.00] 4226 %b = fptrunc <8 x double> %a to <8 x float> 4227 ret <8 x float> %b 4228 } 4229 4230 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { 4231 ; GENERIC-LABEL: fpext_test: 4232 ; GENERIC: # %bb.0: 4233 ; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] 4234 ; GENERIC-NEXT: retq # sched: [1:1.00] 4235 ; 4236 ; SKX-LABEL: fpext_test: 4237 ; SKX: # %bb.0: 4238 ; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] 4239 ; SKX-NEXT: retq # sched: [7:1.00] 4240 %b = fpext <8 x float> %a to <8 x double> 4241 ret <8 x double> %b 4242 } 4243 4244 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { 4245 ; GENERIC-LABEL: zext_16i1_to_16xi32: 4246 ; GENERIC: # %bb.0: 4247 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 4248 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 4249 ; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 4250 ; GENERIC-NEXT: retq # sched: [1:1.00] 4251 ; 4252 ; SKX-LABEL: zext_16i1_to_16xi32: 4253 ; SKX: # %bb.0: 4254 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 4255 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 4256 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 4257 ; SKX-NEXT: retq # sched: [7:1.00] 4258 %a = bitcast i16 %b to <16 x i1> 4259 %c = zext <16 x i1> %a to <16 x i32> 4260 ret <16 x i32> %c 4261 } 4262 4263 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { 4264 ; GENERIC-LABEL: zext_8i1_to_8xi64: 4265 ; GENERIC: # %bb.0: 4266 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 4267 ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] 4268 ; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] 4269 ; GENERIC-NEXT: retq # sched: [1:1.00] 4270 ; 4271 ; SKX-LABEL: zext_8i1_to_8xi64: 4272 ; SKX: # %bb.0: 4273 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 4274 ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] 4275 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] 4276 ; SKX-NEXT: retq # sched: [7:1.00] 4277 %a = bitcast i8 %b to <8 x i1> 4278 %c = zext <8 x i1> %a to <8 x i64> 4279 ret <8 x i64> %c 4280 } 4281 4282 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { 4283 ; GENERIC-LABEL: trunc_16i8_to_16i1: 4284 ; GENERIC: # %bb.0: 4285 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 4286 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 4287 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4288 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 4289 ; GENERIC-NEXT: retq # sched: [1:1.00] 4290 ; 4291 ; SKX-LABEL: trunc_16i8_to_16i1: 4292 ; SKX: # %bb.0: 4293 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 4294 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 4295 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4296 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 4297 ; SKX-NEXT: retq # sched: [7:1.00] 4298 %mask_b = trunc <16 x i8>%a to <16 x i1> 4299 %mask = bitcast <16 x i1> %mask_b to i16 4300 ret i16 %mask 4301 } 4302 4303 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { 4304 ; GENERIC-LABEL: trunc_16i32_to_16i1: 4305 ; GENERIC: # %bb.0: 4306 ; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] 4307 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 4308 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4309 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 4310 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4311 ; GENERIC-NEXT: retq # sched: [1:1.00] 4312 ; 4313 ; SKX-LABEL: trunc_16i32_to_16i1: 4314 ; SKX: # %bb.0: 4315 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] 4316 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 4317 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4318 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 4319 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 4320 ; SKX-NEXT: retq # sched: [7:1.00] 4321 %mask_b = trunc <16 x i32>%a to <16 x i1> 4322 %mask = bitcast <16 x i1> %mask_b to i16 4323 ret i16 %mask 4324 } 4325 4326 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { 4327 ; GENERIC-LABEL: trunc_4i32_to_4i1: 4328 ; GENERIC: # %bb.0: 4329 ; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4330 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 4331 ; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00] 4332 ; GENERIC-NEXT: retq # sched: [1:1.00] 4333 ; 4334 ; SKX-LABEL: trunc_4i32_to_4i1: 4335 ; SKX: # %bb.0: 4336 ; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4337 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 4338 ; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50] 4339 ; SKX-NEXT: retq # sched: [7:1.00] 4340 %mask_a = trunc <4 x i32>%a to <4 x i1> 4341 %mask_b = trunc <4 x i32>%b to <4 x i1> 4342 %a_and_b = and <4 x i1>%mask_a, %mask_b 4343 %res = sext <4 x i1>%a_and_b to <4 x i32> 4344 ret <4 x i32>%res 4345 } 4346 4347 4348 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { 4349 ; GENERIC-LABEL: trunc_8i16_to_8i1: 4350 ; GENERIC: # %bb.0: 4351 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 4352 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 4353 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4354 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax 4355 ; GENERIC-NEXT: retq # sched: [1:1.00] 4356 ; 4357 ; SKX-LABEL: trunc_8i16_to_8i1: 4358 ; SKX: # %bb.0: 4359 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 4360 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 4361 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4362 ; SKX-NEXT: # kill: def $al killed $al killed $eax 4363 ; SKX-NEXT: retq # sched: [7:1.00] 4364 %mask_b = trunc <8 x i16>%a to <8 x i1> 4365 %mask = bitcast <8 x i1> %mask_b to i8 4366 ret i8 %mask 4367 } 4368 4369 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { 4370 ; GENERIC-LABEL: sext_8i1_8i32: 4371 ; GENERIC: # %bb.0: 4372 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 4373 ; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50] 4374 ; GENERIC-NEXT: retq # sched: [1:1.00] 4375 ; 4376 ; SKX-LABEL: sext_8i1_8i32: 4377 ; SKX: # %bb.0: 4378 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 4379 ; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33] 4380 ; SKX-NEXT: retq # sched: [7:1.00] 4381 %x = icmp slt <8 x i32> %a1, %a2 4382 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 4383 %y = sext <8 x i1> %x1 to <8 x i32> 4384 ret <8 x i32> %y 4385 } 4386 4387 4388 define i16 @trunc_i32_to_i1(i32 %a) { 4389 ; GENERIC-LABEL: trunc_i32_to_i1: 4390 ; GENERIC: # %bb.0: 4391 ; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33] 4392 ; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33] 4393 ; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00] 4394 ; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00] 4395 ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] 4396 ; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33] 4397 ; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:0.33] 4398 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4399 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 4400 ; GENERIC-NEXT: retq # sched: [1:1.00] 4401 ; 4402 ; SKX-LABEL: trunc_i32_to_i1: 4403 ; SKX: # %bb.0: 4404 ; SKX-NEXT: movw $-4, %ax # sched: [1:0.25] 4405 ; SKX-NEXT: kmovd %eax, %k0 # sched: [1:1.00] 4406 ; SKX-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00] 4407 ; SKX-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00] 4408 ; SKX-NEXT: andl $1, %edi # sched: [1:0.25] 4409 ; SKX-NEXT: kmovw %edi, %k1 # sched: [1:1.00] 4410 ; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] 4411 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4412 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 4413 ; SKX-NEXT: retq # sched: [7:1.00] 4414 %a_i = trunc i32 %a to i1 4415 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 4416 %res = bitcast <16 x i1> %maskv to i16 4417 ret i16 %res 4418 } 4419 4420 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { 4421 ; GENERIC-LABEL: sext_8i1_8i16: 4422 ; GENERIC: # %bb.0: 4423 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] 4424 ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] 4425 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4426 ; GENERIC-NEXT: retq # sched: [1:1.00] 4427 ; 4428 ; SKX-LABEL: sext_8i1_8i16: 4429 ; SKX: # %bb.0: 4430 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] 4431 ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] 4432 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 4433 ; SKX-NEXT: retq # sched: [7:1.00] 4434 %x = icmp slt <8 x i32> %a1, %a2 4435 %y = sext <8 x i1> %x to <8 x i16> 4436 ret <8 x i16> %y 4437 } 4438 4439 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { 4440 ; GENERIC-LABEL: sext_16i1_16i32: 4441 ; GENERIC: # %bb.0: 4442 ; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50] 4443 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 4444 ; GENERIC-NEXT: retq # sched: [1:1.00] 4445 ; 4446 ; SKX-LABEL: sext_16i1_16i32: 4447 ; SKX: # %bb.0: 4448 ; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] 4449 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 4450 ; SKX-NEXT: retq # sched: [7:1.00] 4451 %x = icmp slt <16 x i32> %a1, %a2 4452 %y = sext <16 x i1> %x to <16 x i32> 4453 ret <16 x i32> %y 4454 } 4455 4456 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { 4457 ; GENERIC-LABEL: sext_8i1_8i64: 4458 ; GENERIC: # %bb.0: 4459 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] 4460 ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] 4461 ; GENERIC-NEXT: retq # sched: [1:1.00] 4462 ; 4463 ; SKX-LABEL: sext_8i1_8i64: 4464 ; SKX: # %bb.0: 4465 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] 4466 ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] 4467 ; SKX-NEXT: retq # sched: [7:1.00] 4468 %x = icmp slt <8 x i32> %a1, %a2 4469 %y = sext <8 x i1> %x to <8 x i64> 4470 ret <8 x i64> %y 4471 } 4472 4473 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { 4474 ; GENERIC-LABEL: extload_v8i64: 4475 ; GENERIC: # %bb.0: 4476 ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] 4477 ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] 4478 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4479 ; GENERIC-NEXT: retq # sched: [1:1.00] 4480 ; 4481 ; SKX-LABEL: extload_v8i64: 4482 ; SKX: # %bb.0: 4483 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] 4484 ; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] 4485 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 4486 ; SKX-NEXT: retq # sched: [7:1.00] 4487 %sign_load = load <8 x i8>, <8 x i8>* %a 4488 %c = sext <8 x i8> %sign_load to <8 x i64> 4489 store <8 x i64> %c, <8 x i64>* %res 4490 ret void 4491 } 4492 4493 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { 4494 ; GENERIC-LABEL: test21: 4495 ; GENERIC: # %bb.0: 4496 ; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] 4497 ; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33] 4498 ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 4499 ; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00] 4500 ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50] 4501 ; GENERIC-NEXT: retq # sched: [1:1.00] 4502 ; 4503 ; SKX-LABEL: test21: 4504 ; SKX: # %bb.0: 4505 ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] 4506 ; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] 4507 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 4508 ; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] 4509 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33] 4510 ; SKX-NEXT: retq # sched: [7:1.00] 4511 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer 4512 ret <64 x i16> %ret 4513 } 4514 4515 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { 4516 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16: 4517 ; GENERIC: # %bb.0: 4518 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4519 ; GENERIC-NEXT: retq # sched: [1:1.00] 4520 ; 4521 ; SKX-LABEL: shuffle_zext_16x8_to_16x16: 4522 ; SKX: # %bb.0: 4523 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4524 ; SKX-NEXT: retq # sched: [7:1.00] 4525 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 4526 %2 = bitcast <32 x i8> %1 to <16 x i16> 4527 ret <16 x i16> %2 4528 } 4529 4530 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { 4531 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask: 4532 ; GENERIC: # %bb.0: 4533 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 4534 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 4535 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4536 ; GENERIC-NEXT: retq # sched: [1:1.00] 4537 ; 4538 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: 4539 ; SKX: # %bb.0: 4540 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 4541 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 4542 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4543 ; SKX-NEXT: retq # sched: [7:1.00] 4544 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 4545 %bc = bitcast <32 x i8> %x to <16 x i16> 4546 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer 4547 ret <16 x i16> %ret 4548 } 4549 4550 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { 4551 ; GENERIC-LABEL: zext_32x8_to_16x16: 4552 ; GENERIC: # %bb.0: 4553 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4554 ; GENERIC-NEXT: retq # sched: [1:1.00] 4555 ; 4556 ; SKX-LABEL: zext_32x8_to_16x16: 4557 ; SKX: # %bb.0: 4558 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4559 ; SKX-NEXT: retq # sched: [7:1.00] 4560 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> 4561 %2 = bitcast <32 x i8> %1 to <16 x i16> 4562 ret <16 x i16> %2 4563 } 4564 4565 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { 4566 ; GENERIC-LABEL: zext_32x8_to_8x32: 4567 ; GENERIC: # %bb.0: 4568 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 4569 ; GENERIC-NEXT: retq # sched: [1:1.00] 4570 ; 4571 ; SKX-LABEL: zext_32x8_to_8x32: 4572 ; SKX: # %bb.0: 4573 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4574 ; SKX-NEXT: retq # sched: [7:1.00] 4575 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 4576 %2 = bitcast <32 x i8> %1 to <8 x i32> 4577 ret <8 x i32> %2 4578 } 4579 4580 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { 4581 ; GENERIC-LABEL: zext_32x8_to_4x64: 4582 ; GENERIC: # %bb.0: 4583 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4584 ; GENERIC-NEXT: retq # sched: [1:1.00] 4585 ; 4586 ; SKX-LABEL: zext_32x8_to_4x64: 4587 ; SKX: # %bb.0: 4588 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4589 ; SKX-NEXT: retq # sched: [7:1.00] 4590 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 4591 %2 = bitcast <32 x i8> %1 to <4 x i64> 4592 ret <4 x i64> %2 4593 } 4594 4595 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { 4596 ; GENERIC-LABEL: zext_16x16_to_8x32: 4597 ; GENERIC: # %bb.0: 4598 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4599 ; GENERIC-NEXT: retq # sched: [1:1.00] 4600 ; 4601 ; SKX-LABEL: zext_16x16_to_8x32: 4602 ; SKX: # %bb.0: 4603 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4604 ; SKX-NEXT: retq # sched: [7:1.00] 4605 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> 4606 %2 = bitcast <16 x i16> %1 to <8 x i32> 4607 ret <8 x i32> %2 4608 } 4609 4610 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { 4611 ; GENERIC-LABEL: zext_16x16_to_4x64: 4612 ; GENERIC: # %bb.0: 4613 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4614 ; GENERIC-NEXT: retq # sched: [1:1.00] 4615 ; 4616 ; SKX-LABEL: zext_16x16_to_4x64: 4617 ; SKX: # %bb.0: 4618 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4619 ; SKX-NEXT: retq # sched: [7:1.00] 4620 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 4621 %2 = bitcast <16 x i16> %1 to <4 x i64> 4622 ret <4 x i64> %2 4623 } 4624 4625 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { 4626 ; GENERIC-LABEL: zext_8x32_to_4x64: 4627 ; GENERIC: # %bb.0: 4628 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4629 ; GENERIC-NEXT: retq # sched: [1:1.00] 4630 ; 4631 ; SKX-LABEL: zext_8x32_to_4x64: 4632 ; SKX: # %bb.0: 4633 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4634 ; SKX-NEXT: retq # sched: [7:1.00] 4635 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 4636 %2 = bitcast <8 x i32> %1 to <4 x i64> 4637 ret <4 x i64> %2 4638 } 4639 4640 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { 4641 ; GENERIC-LABEL: zext_64xi1_to_64xi8: 4642 ; GENERIC: # %bb.0: 4643 ; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50] 4644 ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50] 4645 ; GENERIC-NEXT: retq # sched: [1:1.00] 4646 ; 4647 ; SKX-LABEL: zext_64xi1_to_64xi8: 4648 ; SKX: # %bb.0: 4649 ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00] 4650 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] 4651 ; SKX-NEXT: retq # sched: [7:1.00] 4652 %mask = icmp eq <64 x i8> %x, %y 4653 %1 = zext <64 x i1> %mask to <64 x i8> 4654 ret <64 x i8> %1 4655 } 4656 4657 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { 4658 ; GENERIC-LABEL: zext_32xi1_to_32xi16: 4659 ; GENERIC: # %bb.0: 4660 ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50] 4661 ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] 4662 ; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] 4663 ; GENERIC-NEXT: retq # sched: [1:1.00] 4664 ; 4665 ; SKX-LABEL: zext_32xi1_to_32xi16: 4666 ; SKX: # %bb.0: 4667 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00] 4668 ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] 4669 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] 4670 ; SKX-NEXT: retq # sched: [7:1.00] 4671 %mask = icmp eq <32 x i16> %x, %y 4672 %1 = zext <32 x i1> %mask to <32 x i16> 4673 ret <32 x i16> %1 4674 } 4675 4676 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { 4677 ; GENERIC-LABEL: zext_16xi1_to_16xi16: 4678 ; GENERIC: # %bb.0: 4679 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4680 ; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00] 4681 ; GENERIC-NEXT: retq # sched: [1:1.00] 4682 ; 4683 ; SKX-LABEL: zext_16xi1_to_16xi16: 4684 ; SKX: # %bb.0: 4685 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4686 ; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50] 4687 ; SKX-NEXT: retq # sched: [7:1.00] 4688 %mask = icmp eq <16 x i16> %x, %y 4689 %1 = zext <16 x i1> %mask to <16 x i16> 4690 ret <16 x i16> %1 4691 } 4692 4693 4694 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { 4695 ; GENERIC-LABEL: zext_32xi1_to_32xi8: 4696 ; GENERIC: # %bb.0: 4697 ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50] 4698 ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50] 4699 ; GENERIC-NEXT: retq # sched: [1:1.00] 4700 ; 4701 ; SKX-LABEL: zext_32xi1_to_32xi8: 4702 ; SKX: # %bb.0: 4703 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00] 4704 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] 4705 ; SKX-NEXT: retq # sched: [7:1.00] 4706 %mask = icmp eq <32 x i16> %x, %y 4707 %1 = zext <32 x i1> %mask to <32 x i8> 4708 ret <32 x i8> %1 4709 } 4710 4711 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { 4712 ; GENERIC-LABEL: zext_4xi1_to_4x32: 4713 ; GENERIC: # %bb.0: 4714 ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] 4715 ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4716 ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4717 ; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4718 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] 4719 ; GENERIC-NEXT: retq # sched: [1:1.00] 4720 ; 4721 ; SKX-LABEL: zext_4xi1_to_4x32: 4722 ; SKX: # %bb.0: 4723 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] 4724 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4725 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4726 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4727 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] 4728 ; SKX-NEXT: retq # sched: [7:1.00] 4729 %mask = icmp eq <4 x i8> %x, %y 4730 %1 = zext <4 x i1> %mask to <4 x i32> 4731 ret <4 x i32> %1 4732 } 4733 4734 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { 4735 ; GENERIC-LABEL: zext_2xi1_to_2xi64: 4736 ; GENERIC: # %bb.0: 4737 ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] 4738 ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4739 ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4740 ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4741 ; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00] 4742 ; GENERIC-NEXT: retq # sched: [1:1.00] 4743 ; 4744 ; SKX-LABEL: zext_2xi1_to_2xi64: 4745 ; SKX: # %bb.0: 4746 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] 4747 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4748 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4749 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4750 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50] 4751 ; SKX-NEXT: retq # sched: [7:1.00] 4752 %mask = icmp eq <2 x i8> %x, %y 4753 %1 = zext <2 x i1> %mask to <2 x i64> 4754 ret <2 x i64> %1 4755 } 4756 4757 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4758 ; GENERIC-LABEL: test_x86_fmadd_ps_z: 4759 ; GENERIC: # %bb.0: 4760 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4761 ; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4762 ; GENERIC-NEXT: retq # sched: [1:1.00] 4763 ; 4764 ; SKX-LABEL: test_x86_fmadd_ps_z: 4765 ; SKX: # %bb.0: 4766 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4767 ; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4768 ; SKX-NEXT: retq # sched: [7:1.00] 4769 %x = fmul <16 x float> %a0, %a1 4770 %res = fadd <16 x float> %x, %a2 4771 ret <16 x float> %res 4772 } 4773 4774 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4775 ; GENERIC-LABEL: test_x86_fmsub_ps_z: 4776 ; GENERIC: # %bb.0: 4777 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4778 ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4779 ; GENERIC-NEXT: retq # sched: [1:1.00] 4780 ; 4781 ; SKX-LABEL: test_x86_fmsub_ps_z: 4782 ; SKX: # %bb.0: 4783 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4784 ; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4785 ; SKX-NEXT: retq # sched: [7:1.00] 4786 %x = fmul <16 x float> %a0, %a1 4787 %res = fsub <16 x float> %x, %a2 4788 ret <16 x float> %res 4789 } 4790 4791 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4792 ; GENERIC-LABEL: test_x86_fnmadd_ps_z: 4793 ; GENERIC: # %bb.0: 4794 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4795 ; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00] 4796 ; GENERIC-NEXT: retq # sched: [1:1.00] 4797 ; 4798 ; SKX-LABEL: test_x86_fnmadd_ps_z: 4799 ; SKX: # %bb.0: 4800 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4801 ; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50] 4802 ; SKX-NEXT: retq # sched: [7:1.00] 4803 %x = fmul <16 x float> %a0, %a1 4804 %res = fsub <16 x float> %a2, %x 4805 ret <16 x float> %res 4806 } 4807 4808 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4809 ; GENERIC-LABEL: test_x86_fnmsub_ps_z: 4810 ; GENERIC: # %bb.0: 4811 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4812 ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] 4813 ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4814 ; GENERIC-NEXT: retq # sched: [1:1.00] 4815 ; 4816 ; SKX-LABEL: test_x86_fnmsub_ps_z: 4817 ; SKX: # %bb.0: 4818 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4819 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 4820 ; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4821 ; SKX-NEXT: retq # sched: [7:1.00] 4822 %x = fmul <16 x float> %a0, %a1 4823 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 4824 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 4825 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 4826 float -0.000000e+00>, %x 4827 %res = fsub <16 x float> %y, %a2 4828 ret <16 x float> %res 4829 } 4830 4831 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 4832 ; GENERIC-LABEL: test_x86_fmadd_pd_z: 4833 ; GENERIC: # %bb.0: 4834 ; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4835 ; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4836 ; GENERIC-NEXT: retq # sched: [1:1.00] 4837 ; 4838 ; SKX-LABEL: test_x86_fmadd_pd_z: 4839 ; SKX: # %bb.0: 4840 ; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4841 ; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4842 ; SKX-NEXT: retq # sched: [7:1.00] 4843 %x = fmul <8 x double> %a0, %a1 4844 %res = fadd <8 x double> %x, %a2 4845 ret <8 x double> %res 4846 } 4847 4848 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 4849 ; GENERIC-LABEL: test_x86_fmsub_pd_z: 4850 ; GENERIC: # %bb.0: 4851 ; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4852 ; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4853 ; GENERIC-NEXT: retq # sched: [1:1.00] 4854 ; 4855 ; SKX-LABEL: test_x86_fmsub_pd_z: 4856 ; SKX: # %bb.0: 4857 ; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4858 ; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4859 ; SKX-NEXT: retq # sched: [7:1.00] 4860 %x = fmul <8 x double> %a0, %a1 4861 %res = fsub <8 x double> %x, %a2 4862 ret <8 x double> %res 4863 } 4864 4865 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { 4866 ; GENERIC-LABEL: test_x86_fmsub_213: 4867 ; GENERIC: # %bb.0: 4868 ; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4869 ; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] 4870 ; GENERIC-NEXT: retq # sched: [1:1.00] 4871 ; 4872 ; SKX-LABEL: test_x86_fmsub_213: 4873 ; SKX: # %bb.0: 4874 ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4875 ; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] 4876 ; SKX-NEXT: retq # sched: [7:1.00] 4877 %x = fmul double %a0, %a1 4878 %res = fsub double %x, %a2 4879 ret double %res 4880 } 4881 4882 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { 4883 ; GENERIC-LABEL: test_x86_fmsub_213_m: 4884 ; GENERIC: # %bb.0: 4885 ; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4886 ; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 4887 ; GENERIC-NEXT: retq # sched: [1:1.00] 4888 ; 4889 ; SKX-LABEL: test_x86_fmsub_213_m: 4890 ; SKX: # %bb.0: 4891 ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4892 ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 4893 ; SKX-NEXT: retq # sched: [7:1.00] 4894 %a2 = load double , double *%a2_ptr 4895 %x = fmul double %a0, %a1 4896 %res = fsub double %x, %a2 4897 ret double %res 4898 } 4899 4900 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { 4901 ; GENERIC-LABEL: test_x86_fmsub_231_m: 4902 ; GENERIC: # %bb.0: 4903 ; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4904 ; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4905 ; GENERIC-NEXT: retq # sched: [1:1.00] 4906 ; 4907 ; SKX-LABEL: test_x86_fmsub_231_m: 4908 ; SKX: # %bb.0: 4909 ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 4910 ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4911 ; SKX-NEXT: retq # sched: [7:1.00] 4912 %a2 = load double , double *%a2_ptr 4913 %x = fmul double %a0, %a2 4914 %res = fsub double %x, %a1 4915 ret double %res 4916 } 4917 4918 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { 4919 ; GENERIC-LABEL: test231_br: 4920 ; GENERIC: # %bb.0: 4921 ; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00] 4922 ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] 4923 ; GENERIC-NEXT: retq # sched: [1:1.00] 4924 ; 4925 ; SKX-LABEL: test231_br: 4926 ; SKX: # %bb.0: 4927 ; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] 4928 ; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4929 ; SKX-NEXT: retq # sched: [7:1.00] 4930 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 4931 %b2 = fadd <16 x float> %b1, %a2 4932 ret <16 x float> %b2 4933 } 4934 4935 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { 4936 ; GENERIC-LABEL: test213_br: 4937 ; GENERIC: # %bb.0: 4938 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4939 ; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] 4940 ; GENERIC-NEXT: retq # sched: [1:1.00] 4941 ; 4942 ; SKX-LABEL: test213_br: 4943 ; SKX: # %bb.0: 4944 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4945 ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] 4946 ; SKX-NEXT: retq # sched: [7:1.00] 4947 %b1 = fmul <16 x float> %a1, %a2 4948 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 4949 ret <16 x float> %b2 4950 } 4951 4952 ;mask (a*c+b , a) 4953 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 4954 ; GENERIC-LABEL: test_x86_fmadd132_ps: 4955 ; GENERIC: # %bb.0: 4956 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] 4957 ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] 4958 ; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00] 4959 ; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] 4960 ; GENERIC-NEXT: retq # sched: [1:1.00] 4961 ; 4962 ; SKX-LABEL: test_x86_fmadd132_ps: 4963 ; SKX: # %bb.0: 4964 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] 4965 ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] 4966 ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] 4967 ; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50] 4968 ; SKX-NEXT: retq # sched: [7:1.00] 4969 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 4970 %x = fmul <16 x float> %a0, %a2 4971 %y = fadd <16 x float> %x, %a1 4972 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 4973 ret <16 x float> %res 4974 } 4975 4976 ;mask (a*c+b , b) 4977 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 4978 ; GENERIC-LABEL: test_x86_fmadd231_ps: 4979 ; GENERIC: # %bb.0: 4980 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] 4981 ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] 4982 ; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00] 4983 ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00] 4984 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 4985 ; GENERIC-NEXT: retq # sched: [1:1.00] 4986 ; 4987 ; SKX-LABEL: test_x86_fmadd231_ps: 4988 ; SKX: # %bb.0: 4989 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] 4990 ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] 4991 ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] 4992 ; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50] 4993 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 4994 ; SKX-NEXT: retq # sched: [7:1.00] 4995 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 4996 %x = fmul <16 x float> %a0, %a2 4997 %y = fadd <16 x float> %x, %a1 4998 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 4999 ret <16 x float> %res 5000 } 5001 5002 ;mask (b*a+c , b) 5003 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 5004 ; GENERIC-LABEL: test_x86_fmadd213_ps: 5005 ; GENERIC: # %bb.0: 5006 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] 5007 ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] 5008 ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 5009 ; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00] 5010 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 5011 ; GENERIC-NEXT: retq # sched: [1:1.00] 5012 ; 5013 ; SKX-LABEL: test_x86_fmadd213_ps: 5014 ; SKX: # %bb.0: 5015 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] 5016 ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] 5017 ; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 5018 ; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] 5019 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 5020 ; SKX-NEXT: retq # sched: [7:1.00] 5021 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 5022 %x = fmul <16 x float> %a1, %a0 5023 %y = fadd <16 x float> %x, %a2 5024 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 5025 ret <16 x float> %res 5026 } 5027 5028 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5029 ; GENERIC-LABEL: vpandd: 5030 ; GENERIC: # %bb.0: # %entry 5031 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5032 ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5033 ; GENERIC-NEXT: retq # sched: [1:1.00] 5034 ; 5035 ; SKX-LABEL: vpandd: 5036 ; SKX: # %bb.0: # %entry 5037 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5038 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5039 ; SKX-NEXT: retq # sched: [7:1.00] 5040 entry: 5041 ; Force the execution domain with an add. 5042 %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, 5043 i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5044 %x = and <16 x i32> %a2, %b 5045 ret <16 x i32> %x 5046 } 5047 5048 define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5049 ; GENERIC-LABEL: vpandnd: 5050 ; GENERIC: # %bb.0: # %entry 5051 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5052 ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] 5053 ; GENERIC-NEXT: retq # sched: [1:1.00] 5054 ; 5055 ; SKX-LABEL: vpandnd: 5056 ; SKX: # %bb.0: # %entry 5057 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5058 ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5059 ; SKX-NEXT: retq # sched: [7:1.00] 5060 entry: 5061 ; Force the execution domain with an add. 5062 %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, 5063 i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 5064 %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, 5065 i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 5066 %x = and <16 x i32> %a2, %b2 5067 ret <16 x i32> %x 5068 } 5069 5070 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5071 ; GENERIC-LABEL: vpord: 5072 ; GENERIC: # %bb.0: # %entry 5073 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5074 ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5075 ; GENERIC-NEXT: retq # sched: [1:1.00] 5076 ; 5077 ; SKX-LABEL: vpord: 5078 ; SKX: # %bb.0: # %entry 5079 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5080 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5081 ; SKX-NEXT: retq # sched: [7:1.00] 5082 entry: 5083 ; Force the execution domain with an add. 5084 %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, 5085 i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 5086 %x = or <16 x i32> %a2, %b 5087 ret <16 x i32> %x 5088 } 5089 5090 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5091 ; GENERIC-LABEL: vpxord: 5092 ; GENERIC: # %bb.0: # %entry 5093 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5094 ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5095 ; GENERIC-NEXT: retq # sched: [1:1.00] 5096 ; 5097 ; SKX-LABEL: vpxord: 5098 ; SKX: # %bb.0: # %entry 5099 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5100 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5101 ; SKX-NEXT: retq # sched: [7:1.00] 5102 entry: 5103 ; Force the execution domain with an add. 5104 %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, 5105 i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 5106 %x = xor <16 x i32> %a2, %b 5107 ret <16 x i32> %x 5108 } 5109 5110 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5111 ; GENERIC-LABEL: vpandq: 5112 ; GENERIC: # %bb.0: # %entry 5113 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5114 ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5115 ; GENERIC-NEXT: retq # sched: [1:1.00] 5116 ; 5117 ; SKX-LABEL: vpandq: 5118 ; SKX: # %bb.0: # %entry 5119 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5120 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5121 ; SKX-NEXT: retq # sched: [7:1.00] 5122 entry: 5123 ; Force the execution domain with an add. 5124 %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6> 5125 %x = and <8 x i64> %a2, %b 5126 ret <8 x i64> %x 5127 } 5128 5129 define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5130 ; GENERIC-LABEL: vpandnq: 5131 ; GENERIC: # %bb.0: # %entry 5132 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5133 ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] 5134 ; GENERIC-NEXT: retq # sched: [1:1.00] 5135 ; 5136 ; SKX-LABEL: vpandnq: 5137 ; SKX: # %bb.0: # %entry 5138 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5139 ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5140 ; SKX-NEXT: retq # sched: [7:1.00] 5141 entry: 5142 ; Force the execution domain with an add. 5143 %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 5144 %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 5145 %x = and <8 x i64> %a2, %b2 5146 ret <8 x i64> %x 5147 } 5148 5149 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5150 ; GENERIC-LABEL: vporq: 5151 ; GENERIC: # %bb.0: # %entry 5152 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5153 ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5154 ; GENERIC-NEXT: retq # sched: [1:1.00] 5155 ; 5156 ; SKX-LABEL: vporq: 5157 ; SKX: # %bb.0: # %entry 5158 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5159 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5160 ; SKX-NEXT: retq # sched: [7:1.00] 5161 entry: 5162 ; Force the execution domain with an add. 5163 %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8> 5164 %x = or <8 x i64> %a2, %b 5165 ret <8 x i64> %x 5166 } 5167 5168 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5169 ; GENERIC-LABEL: vpxorq: 5170 ; GENERIC: # %bb.0: # %entry 5171 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5172 ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5173 ; GENERIC-NEXT: retq # sched: [1:1.00] 5174 ; 5175 ; SKX-LABEL: vpxorq: 5176 ; SKX: # %bb.0: # %entry 5177 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5178 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5179 ; SKX-NEXT: retq # sched: [7:1.00] 5180 entry: 5181 ; Force the execution domain with an add. 5182 %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9> 5183 %x = xor <8 x i64> %a2, %b 5184 ret <8 x i64> %x 5185 } 5186 5187 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { 5188 ; GENERIC-LABEL: and_v64i8: 5189 ; GENERIC: # %bb.0: 5190 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5191 ; GENERIC-NEXT: retq # sched: [1:1.00] 5192 ; 5193 ; SKX-LABEL: and_v64i8: 5194 ; SKX: # %bb.0: 5195 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5196 ; SKX-NEXT: retq # sched: [7:1.00] 5197 %res = and <64 x i8> %a, %b 5198 ret <64 x i8> %res 5199 } 5200 5201 define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { 5202 ; GENERIC-LABEL: andn_v64i8: 5203 ; GENERIC: # %bb.0: 5204 ; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 5205 ; GENERIC-NEXT: retq # sched: [1:1.00] 5206 ; 5207 ; SKX-LABEL: andn_v64i8: 5208 ; SKX: # %bb.0: 5209 ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5210 ; SKX-NEXT: retq # sched: [7:1.00] 5211 %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 5212 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 5213 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 5214 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 5215 %res = and <64 x i8> %a, %b2 5216 ret <64 x i8> %res 5217 } 5218 5219 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { 5220 ; GENERIC-LABEL: or_v64i8: 5221 ; GENERIC: # %bb.0: 5222 ; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5223 ; GENERIC-NEXT: retq # sched: [1:1.00] 5224 ; 5225 ; SKX-LABEL: or_v64i8: 5226 ; SKX: # %bb.0: 5227 ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5228 ; SKX-NEXT: retq # sched: [7:1.00] 5229 %res = or <64 x i8> %a, %b 5230 ret <64 x i8> %res 5231 } 5232 5233 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { 5234 ; GENERIC-LABEL: xor_v64i8: 5235 ; GENERIC: # %bb.0: 5236 ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5237 ; GENERIC-NEXT: retq # sched: [1:1.00] 5238 ; 5239 ; SKX-LABEL: xor_v64i8: 5240 ; SKX: # %bb.0: 5241 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5242 ; SKX-NEXT: retq # sched: [7:1.00] 5243 %res = xor <64 x i8> %a, %b 5244 ret <64 x i8> %res 5245 } 5246 5247 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { 5248 ; GENERIC-LABEL: and_v32i16: 5249 ; GENERIC: # %bb.0: 5250 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5251 ; GENERIC-NEXT: retq # sched: [1:1.00] 5252 ; 5253 ; SKX-LABEL: and_v32i16: 5254 ; SKX: # %bb.0: 5255 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5256 ; SKX-NEXT: retq # sched: [7:1.00] 5257 %res = and <32 x i16> %a, %b 5258 ret <32 x i16> %res 5259 } 5260 5261 define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { 5262 ; GENERIC-LABEL: andn_v32i16: 5263 ; GENERIC: # %bb.0: 5264 ; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 5265 ; GENERIC-NEXT: retq # sched: [1:1.00] 5266 ; 5267 ; SKX-LABEL: andn_v32i16: 5268 ; SKX: # %bb.0: 5269 ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5270 ; SKX-NEXT: retq # sched: [7:1.00] 5271 %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, 5272 i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 5273 %res = and <32 x i16> %a, %b2 5274 ret <32 x i16> %res 5275 } 5276 5277 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { 5278 ; GENERIC-LABEL: or_v32i16: 5279 ; GENERIC: # %bb.0: 5280 ; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5281 ; GENERIC-NEXT: retq # sched: [1:1.00] 5282 ; 5283 ; SKX-LABEL: or_v32i16: 5284 ; SKX: # %bb.0: 5285 ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5286 ; SKX-NEXT: retq # sched: [7:1.00] 5287 %res = or <32 x i16> %a, %b 5288 ret <32 x i16> %res 5289 } 5290 5291 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { 5292 ; GENERIC-LABEL: xor_v32i16: 5293 ; GENERIC: # %bb.0: 5294 ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5295 ; GENERIC-NEXT: retq # sched: [1:1.00] 5296 ; 5297 ; SKX-LABEL: xor_v32i16: 5298 ; SKX: # %bb.0: 5299 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5300 ; SKX-NEXT: retq # sched: [7:1.00] 5301 %res = xor <32 x i16> %a, %b 5302 ret <32 x i16> %res 5303 } 5304 5305 define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 5306 ; GENERIC-LABEL: masked_and_v16f32: 5307 ; GENERIC: # %bb.0: 5308 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5309 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5310 ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5311 ; GENERIC-NEXT: retq # sched: [1:1.00] 5312 ; 5313 ; SKX-LABEL: masked_and_v16f32: 5314 ; SKX: # %bb.0: 5315 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5316 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5317 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5318 ; SKX-NEXT: retq # sched: [7:1.00] 5319 %a1 = bitcast <16 x float> %a to <16 x i32> 5320 %b1 = bitcast <16 x float> %b to <16 x i32> 5321 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 5322 %mask1 = bitcast i16 %mask to <16 x i1> 5323 %op = and <16 x i32> %a1, %b1 5324 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 5325 %cast = bitcast <16 x i32> %select to <16 x float> 5326 %add = fadd <16 x float> %c, %cast 5327 ret <16 x float> %add 5328 } 5329 5330 define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 5331 ; GENERIC-LABEL: masked_or_v16f32: 5332 ; GENERIC: # %bb.0: 5333 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5334 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5335 ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5336 ; GENERIC-NEXT: retq # sched: [1:1.00] 5337 ; 5338 ; SKX-LABEL: masked_or_v16f32: 5339 ; SKX: # %bb.0: 5340 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5341 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5342 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5343 ; SKX-NEXT: retq # sched: [7:1.00] 5344 %a1 = bitcast <16 x float> %a to <16 x i32> 5345 %b1 = bitcast <16 x float> %b to <16 x i32> 5346 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 5347 %mask1 = bitcast i16 %mask to <16 x i1> 5348 %op = and <16 x i32> %a1, %b1 5349 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 5350 %cast = bitcast <16 x i32> %select to <16 x float> 5351 %add = fadd <16 x float> %c, %cast 5352 ret <16 x float> %add 5353 } 5354 5355 define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 5356 ; GENERIC-LABEL: masked_xor_v16f32: 5357 ; GENERIC: # %bb.0: 5358 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5359 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5360 ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5361 ; GENERIC-NEXT: retq # sched: [1:1.00] 5362 ; 5363 ; SKX-LABEL: masked_xor_v16f32: 5364 ; SKX: # %bb.0: 5365 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5366 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5367 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5368 ; SKX-NEXT: retq # sched: [7:1.00] 5369 %a1 = bitcast <16 x float> %a to <16 x i32> 5370 %b1 = bitcast <16 x float> %b to <16 x i32> 5371 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 5372 %mask1 = bitcast i16 %mask to <16 x i1> 5373 %op = and <16 x i32> %a1, %b1 5374 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 5375 %cast = bitcast <16 x i32> %select to <16 x float> 5376 %add = fadd <16 x float> %c, %cast 5377 ret <16 x float> %add 5378 } 5379 5380 define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 5381 ; GENERIC-LABEL: masked_and_v8f64: 5382 ; GENERIC: # %bb.0: 5383 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5384 ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5385 ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5386 ; GENERIC-NEXT: retq # sched: [1:1.00] 5387 ; 5388 ; SKX-LABEL: masked_and_v8f64: 5389 ; SKX: # %bb.0: 5390 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5391 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5392 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5393 ; SKX-NEXT: retq # sched: [7:1.00] 5394 %a1 = bitcast <8 x double> %a to <8 x i64> 5395 %b1 = bitcast <8 x double> %b to <8 x i64> 5396 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 5397 %mask1 = bitcast i8 %mask to <8 x i1> 5398 %op = and <8 x i64> %a1, %b1 5399 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 5400 %cast = bitcast <8 x i64> %select to <8 x double> 5401 %add = fadd <8 x double> %c, %cast 5402 ret <8 x double> %add 5403 } 5404 5405 define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 5406 ; GENERIC-LABEL: masked_or_v8f64: 5407 ; GENERIC: # %bb.0: 5408 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5409 ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5410 ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5411 ; GENERIC-NEXT: retq # sched: [1:1.00] 5412 ; 5413 ; SKX-LABEL: masked_or_v8f64: 5414 ; SKX: # %bb.0: 5415 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5416 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5417 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5418 ; SKX-NEXT: retq # sched: [7:1.00] 5419 %a1 = bitcast <8 x double> %a to <8 x i64> 5420 %b1 = bitcast <8 x double> %b to <8 x i64> 5421 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 5422 %mask1 = bitcast i8 %mask to <8 x i1> 5423 %op = and <8 x i64> %a1, %b1 5424 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 5425 %cast = bitcast <8 x i64> %select to <8 x double> 5426 %add = fadd <8 x double> %c, %cast 5427 ret <8 x double> %add 5428 } 5429 5430 define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 5431 ; GENERIC-LABEL: masked_xor_v8f64: 5432 ; GENERIC: # %bb.0: 5433 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5434 ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5435 ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5436 ; GENERIC-NEXT: retq # sched: [1:1.00] 5437 ; 5438 ; SKX-LABEL: masked_xor_v8f64: 5439 ; SKX: # %bb.0: 5440 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5441 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5442 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5443 ; SKX-NEXT: retq # sched: [7:1.00] 5444 %a1 = bitcast <8 x double> %a to <8 x i64> 5445 %b1 = bitcast <8 x double> %b to <8 x i64> 5446 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 5447 %mask1 = bitcast i8 %mask to <8 x i1> 5448 %op = and <8 x i64> %a1, %b1 5449 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 5450 %cast = bitcast <8 x i64> %select to <8 x double> 5451 %add = fadd <8 x double> %c, %cast 5452 ret <8 x double> %add 5453 } 5454 5455 define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 5456 ; GENERIC-LABEL: test_mm512_mask_and_epi32: 5457 ; GENERIC: # %bb.0: # %entry 5458 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5459 ; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5460 ; GENERIC-NEXT: retq # sched: [1:1.00] 5461 ; 5462 ; SKX-LABEL: test_mm512_mask_and_epi32: 5463 ; SKX: # %bb.0: # %entry 5464 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5465 ; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5466 ; SKX-NEXT: retq # sched: [7:1.00] 5467 entry: 5468 %and1.i.i = and <8 x i64> %__a, %__b 5469 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> 5470 %1 = bitcast <8 x i64> %__src to <16 x i32> 5471 %2 = bitcast i16 %__k to <16 x i1> 5472 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 5473 %4 = bitcast <16 x i32> %3 to <8 x i64> 5474 ret <8 x i64> %4 5475 } 5476 5477 define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 5478 ; GENERIC-LABEL: test_mm512_mask_or_epi32: 5479 ; GENERIC: # %bb.0: # %entry 5480 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5481 ; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5482 ; GENERIC-NEXT: retq # sched: [1:1.00] 5483 ; 5484 ; SKX-LABEL: test_mm512_mask_or_epi32: 5485 ; SKX: # %bb.0: # %entry 5486 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5487 ; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5488 ; SKX-NEXT: retq # sched: [7:1.00] 5489 entry: 5490 %or1.i.i = or <8 x i64> %__a, %__b 5491 %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> 5492 %1 = bitcast <8 x i64> %__src to <16 x i32> 5493 %2 = bitcast i16 %__k to <16 x i1> 5494 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 5495 %4 = bitcast <16 x i32> %3 to <8 x i64> 5496 ret <8 x i64> %4 5497 } 5498 5499 define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 5500 ; GENERIC-LABEL: test_mm512_mask_xor_epi32: 5501 ; GENERIC: # %bb.0: # %entry 5502 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5503 ; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5504 ; GENERIC-NEXT: retq # sched: [1:1.00] 5505 ; 5506 ; SKX-LABEL: test_mm512_mask_xor_epi32: 5507 ; SKX: # %bb.0: # %entry 5508 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5509 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5510 ; SKX-NEXT: retq # sched: [7:1.00] 5511 entry: 5512 %xor1.i.i = xor <8 x i64> %__a, %__b 5513 %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> 5514 %1 = bitcast <8 x i64> %__src to <16 x i32> 5515 %2 = bitcast i16 %__k to <16 x i1> 5516 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 5517 %4 = bitcast <16 x i32> %3 to <8 x i64> 5518 ret <8 x i64> %4 5519 } 5520 5521 define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5522 ; GENERIC-LABEL: test_mm512_mask_xor_pd: 5523 ; GENERIC: # %bb.0: # %entry 5524 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5525 ; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5526 ; GENERIC-NEXT: retq # sched: [1:1.00] 5527 ; 5528 ; SKX-LABEL: test_mm512_mask_xor_pd: 5529 ; SKX: # %bb.0: # %entry 5530 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5531 ; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5532 ; SKX-NEXT: retq # sched: [7:1.00] 5533 entry: 5534 %0 = bitcast <8 x double> %__A to <8 x i64> 5535 %1 = bitcast <8 x double> %__B to <8 x i64> 5536 %xor.i.i = xor <8 x i64> %0, %1 5537 %2 = bitcast <8 x i64> %xor.i.i to <8 x double> 5538 %3 = bitcast i8 %__U to <8 x i1> 5539 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5540 ret <8 x double> %4 5541 } 5542 5543 define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5544 ; GENERIC-LABEL: test_mm512_maskz_xor_pd: 5545 ; GENERIC: # %bb.0: # %entry 5546 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5547 ; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5548 ; GENERIC-NEXT: retq # sched: [1:1.00] 5549 ; 5550 ; SKX-LABEL: test_mm512_maskz_xor_pd: 5551 ; SKX: # %bb.0: # %entry 5552 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5553 ; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5554 ; SKX-NEXT: retq # sched: [7:1.00] 5555 entry: 5556 %0 = bitcast <8 x double> %__A to <8 x i64> 5557 %1 = bitcast <8 x double> %__B to <8 x i64> 5558 %xor.i.i = xor <8 x i64> %0, %1 5559 %2 = bitcast <8 x i64> %xor.i.i to <8 x double> 5560 %3 = bitcast i8 %__U to <8 x i1> 5561 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5562 ret <8 x double> %4 5563 } 5564 5565 define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5566 ; GENERIC-LABEL: test_mm512_mask_xor_ps: 5567 ; GENERIC: # %bb.0: # %entry 5568 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5569 ; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5570 ; GENERIC-NEXT: retq # sched: [1:1.00] 5571 ; 5572 ; SKX-LABEL: test_mm512_mask_xor_ps: 5573 ; SKX: # %bb.0: # %entry 5574 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5575 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5576 ; SKX-NEXT: retq # sched: [7:1.00] 5577 entry: 5578 %0 = bitcast <16 x float> %__A to <16 x i32> 5579 %1 = bitcast <16 x float> %__B to <16 x i32> 5580 %xor.i.i = xor <16 x i32> %0, %1 5581 %2 = bitcast <16 x i32> %xor.i.i to <16 x float> 5582 %3 = bitcast i16 %__U to <16 x i1> 5583 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5584 ret <16 x float> %4 5585 } 5586 5587 define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5588 ; GENERIC-LABEL: test_mm512_maskz_xor_ps: 5589 ; GENERIC: # %bb.0: # %entry 5590 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5591 ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5592 ; GENERIC-NEXT: retq # sched: [1:1.00] 5593 ; 5594 ; SKX-LABEL: test_mm512_maskz_xor_ps: 5595 ; SKX: # %bb.0: # %entry 5596 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5597 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5598 ; SKX-NEXT: retq # sched: [7:1.00] 5599 entry: 5600 %0 = bitcast <16 x float> %__A to <16 x i32> 5601 %1 = bitcast <16 x float> %__B to <16 x i32> 5602 %xor.i.i = xor <16 x i32> %0, %1 5603 %2 = bitcast <16 x i32> %xor.i.i to <16 x float> 5604 %3 = bitcast i16 %__U to <16 x i1> 5605 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5606 ret <16 x float> %4 5607 } 5608 5609 define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5610 ; GENERIC-LABEL: test_mm512_mask_or_pd: 5611 ; GENERIC: # %bb.0: # %entry 5612 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5613 ; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5614 ; GENERIC-NEXT: retq # sched: [1:1.00] 5615 ; 5616 ; SKX-LABEL: test_mm512_mask_or_pd: 5617 ; SKX: # %bb.0: # %entry 5618 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5619 ; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5620 ; SKX-NEXT: retq # sched: [7:1.00] 5621 entry: 5622 %0 = bitcast <8 x double> %__A to <8 x i64> 5623 %1 = bitcast <8 x double> %__B to <8 x i64> 5624 %or.i.i = or <8 x i64> %1, %0 5625 %2 = bitcast <8 x i64> %or.i.i to <8 x double> 5626 %3 = bitcast i8 %__U to <8 x i1> 5627 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5628 ret <8 x double> %4 5629 } 5630 5631 define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5632 ; GENERIC-LABEL: test_mm512_maskz_or_pd: 5633 ; GENERIC: # %bb.0: # %entry 5634 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5635 ; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5636 ; GENERIC-NEXT: retq # sched: [1:1.00] 5637 ; 5638 ; SKX-LABEL: test_mm512_maskz_or_pd: 5639 ; SKX: # %bb.0: # %entry 5640 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5641 ; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5642 ; SKX-NEXT: retq # sched: [7:1.00] 5643 entry: 5644 %0 = bitcast <8 x double> %__A to <8 x i64> 5645 %1 = bitcast <8 x double> %__B to <8 x i64> 5646 %or.i.i = or <8 x i64> %1, %0 5647 %2 = bitcast <8 x i64> %or.i.i to <8 x double> 5648 %3 = bitcast i8 %__U to <8 x i1> 5649 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5650 ret <8 x double> %4 5651 } 5652 5653 define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5654 ; GENERIC-LABEL: test_mm512_mask_or_ps: 5655 ; GENERIC: # %bb.0: # %entry 5656 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5657 ; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5658 ; GENERIC-NEXT: retq # sched: [1:1.00] 5659 ; 5660 ; SKX-LABEL: test_mm512_mask_or_ps: 5661 ; SKX: # %bb.0: # %entry 5662 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5663 ; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5664 ; SKX-NEXT: retq # sched: [7:1.00] 5665 entry: 5666 %0 = bitcast <16 x float> %__A to <16 x i32> 5667 %1 = bitcast <16 x float> %__B to <16 x i32> 5668 %or.i.i = or <16 x i32> %1, %0 5669 %2 = bitcast <16 x i32> %or.i.i to <16 x float> 5670 %3 = bitcast i16 %__U to <16 x i1> 5671 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5672 ret <16 x float> %4 5673 } 5674 5675 define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5676 ; GENERIC-LABEL: test_mm512_maskz_or_ps: 5677 ; GENERIC: # %bb.0: # %entry 5678 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5679 ; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5680 ; GENERIC-NEXT: retq # sched: [1:1.00] 5681 ; 5682 ; SKX-LABEL: test_mm512_maskz_or_ps: 5683 ; SKX: # %bb.0: # %entry 5684 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5685 ; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5686 ; SKX-NEXT: retq # sched: [7:1.00] 5687 entry: 5688 %0 = bitcast <16 x float> %__A to <16 x i32> 5689 %1 = bitcast <16 x float> %__B to <16 x i32> 5690 %or.i.i = or <16 x i32> %1, %0 5691 %2 = bitcast <16 x i32> %or.i.i to <16 x float> 5692 %3 = bitcast i16 %__U to <16 x i1> 5693 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5694 ret <16 x float> %4 5695 } 5696 5697 define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5698 ; GENERIC-LABEL: test_mm512_mask_and_pd: 5699 ; GENERIC: # %bb.0: # %entry 5700 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5701 ; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5702 ; GENERIC-NEXT: retq # sched: [1:1.00] 5703 ; 5704 ; SKX-LABEL: test_mm512_mask_and_pd: 5705 ; SKX: # %bb.0: # %entry 5706 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5707 ; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5708 ; SKX-NEXT: retq # sched: [7:1.00] 5709 entry: 5710 %0 = bitcast <8 x double> %__A to <8 x i64> 5711 %1 = bitcast <8 x double> %__B to <8 x i64> 5712 %and.i.i = and <8 x i64> %1, %0 5713 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5714 %3 = bitcast i8 %__U to <8 x i1> 5715 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5716 ret <8 x double> %4 5717 } 5718 5719 define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5720 ; GENERIC-LABEL: test_mm512_maskz_and_pd: 5721 ; GENERIC: # %bb.0: # %entry 5722 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5723 ; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5724 ; GENERIC-NEXT: retq # sched: [1:1.00] 5725 ; 5726 ; SKX-LABEL: test_mm512_maskz_and_pd: 5727 ; SKX: # %bb.0: # %entry 5728 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5729 ; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5730 ; SKX-NEXT: retq # sched: [7:1.00] 5731 entry: 5732 %0 = bitcast <8 x double> %__A to <8 x i64> 5733 %1 = bitcast <8 x double> %__B to <8 x i64> 5734 %and.i.i = and <8 x i64> %1, %0 5735 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5736 %3 = bitcast i8 %__U to <8 x i1> 5737 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5738 ret <8 x double> %4 5739 } 5740 5741 define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5742 ; GENERIC-LABEL: test_mm512_mask_and_ps: 5743 ; GENERIC: # %bb.0: # %entry 5744 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5745 ; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5746 ; GENERIC-NEXT: retq # sched: [1:1.00] 5747 ; 5748 ; SKX-LABEL: test_mm512_mask_and_ps: 5749 ; SKX: # %bb.0: # %entry 5750 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5751 ; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5752 ; SKX-NEXT: retq # sched: [7:1.00] 5753 entry: 5754 %0 = bitcast <16 x float> %__A to <16 x i32> 5755 %1 = bitcast <16 x float> %__B to <16 x i32> 5756 %and.i.i = and <16 x i32> %1, %0 5757 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5758 %3 = bitcast i16 %__U to <16 x i1> 5759 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5760 ret <16 x float> %4 5761 } 5762 5763 define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5764 ; GENERIC-LABEL: test_mm512_maskz_and_ps: 5765 ; GENERIC: # %bb.0: # %entry 5766 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5767 ; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5768 ; GENERIC-NEXT: retq # sched: [1:1.00] 5769 ; 5770 ; SKX-LABEL: test_mm512_maskz_and_ps: 5771 ; SKX: # %bb.0: # %entry 5772 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5773 ; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5774 ; SKX-NEXT: retq # sched: [7:1.00] 5775 entry: 5776 %0 = bitcast <16 x float> %__A to <16 x i32> 5777 %1 = bitcast <16 x float> %__B to <16 x i32> 5778 %and.i.i = and <16 x i32> %1, %0 5779 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5780 %3 = bitcast i16 %__U to <16 x i1> 5781 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5782 ret <16 x float> %4 5783 } 5784 5785 define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5786 ; GENERIC-LABEL: test_mm512_mask_andnot_pd: 5787 ; GENERIC: # %bb.0: # %entry 5788 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5789 ; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5790 ; GENERIC-NEXT: retq # sched: [1:1.00] 5791 ; 5792 ; SKX-LABEL: test_mm512_mask_andnot_pd: 5793 ; SKX: # %bb.0: # %entry 5794 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5795 ; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5796 ; SKX-NEXT: retq # sched: [7:1.00] 5797 entry: 5798 %0 = bitcast <8 x double> %__A to <8 x i64> 5799 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 5800 %1 = bitcast <8 x double> %__B to <8 x i64> 5801 %and.i.i = and <8 x i64> %1, %neg.i.i 5802 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5803 %3 = bitcast i8 %__U to <8 x i1> 5804 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5805 ret <8 x double> %4 5806 } 5807 5808 define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5809 ; GENERIC-LABEL: test_mm512_maskz_andnot_pd: 5810 ; GENERIC: # %bb.0: # %entry 5811 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5812 ; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5813 ; GENERIC-NEXT: retq # sched: [1:1.00] 5814 ; 5815 ; SKX-LABEL: test_mm512_maskz_andnot_pd: 5816 ; SKX: # %bb.0: # %entry 5817 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5818 ; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5819 ; SKX-NEXT: retq # sched: [7:1.00] 5820 entry: 5821 %0 = bitcast <8 x double> %__A to <8 x i64> 5822 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 5823 %1 = bitcast <8 x double> %__B to <8 x i64> 5824 %and.i.i = and <8 x i64> %1, %neg.i.i 5825 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5826 %3 = bitcast i8 %__U to <8 x i1> 5827 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5828 ret <8 x double> %4 5829 } 5830 5831 define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5832 ; GENERIC-LABEL: test_mm512_mask_andnot_ps: 5833 ; GENERIC: # %bb.0: # %entry 5834 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5835 ; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5836 ; GENERIC-NEXT: retq # sched: [1:1.00] 5837 ; 5838 ; SKX-LABEL: test_mm512_mask_andnot_ps: 5839 ; SKX: # %bb.0: # %entry 5840 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5841 ; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5842 ; SKX-NEXT: retq # sched: [7:1.00] 5843 entry: 5844 %0 = bitcast <16 x float> %__A to <16 x i32> 5845 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 5846 %1 = bitcast <16 x float> %__B to <16 x i32> 5847 %and.i.i = and <16 x i32> %1, %neg.i.i 5848 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5849 %3 = bitcast i16 %__U to <16 x i1> 5850 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5851 ret <16 x float> %4 5852 } 5853 5854 define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5855 ; GENERIC-LABEL: test_mm512_maskz_andnot_ps: 5856 ; GENERIC: # %bb.0: # %entry 5857 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5858 ; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5859 ; GENERIC-NEXT: retq # sched: [1:1.00] 5860 ; 5861 ; SKX-LABEL: test_mm512_maskz_andnot_ps: 5862 ; SKX: # %bb.0: # %entry 5863 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5864 ; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5865 ; SKX-NEXT: retq # sched: [7:1.00] 5866 entry: 5867 %0 = bitcast <16 x float> %__A to <16 x i32> 5868 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 5869 %1 = bitcast <16 x float> %__B to <16 x i32> 5870 %and.i.i = and <16 x i32> %1, %neg.i.i 5871 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5872 %3 = bitcast i16 %__U to <16 x i1> 5873 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5874 ret <16 x float> %4 5875 } 5876 5877 define i32 @mov_test1(float %x) { 5878 ; GENERIC-LABEL: mov_test1: 5879 ; GENERIC: # %bb.0: 5880 ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5881 ; GENERIC-NEXT: retq # sched: [1:1.00] 5882 ; 5883 ; SKX-LABEL: mov_test1: 5884 ; SKX: # %bb.0: 5885 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5886 ; SKX-NEXT: retq # sched: [7:1.00] 5887 %res = bitcast float %x to i32 5888 ret i32 %res 5889 } 5890 5891 define <4 x i32> @mov_test2(i32 %x) { 5892 ; GENERIC-LABEL: mov_test2: 5893 ; GENERIC: # %bb.0: 5894 ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 5895 ; GENERIC-NEXT: retq # sched: [1:1.00] 5896 ; 5897 ; SKX-LABEL: mov_test2: 5898 ; SKX: # %bb.0: 5899 ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 5900 ; SKX-NEXT: retq # sched: [7:1.00] 5901 %res = insertelement <4 x i32>undef, i32 %x, i32 0 5902 ret <4 x i32>%res 5903 } 5904 5905 define <2 x i64> @mov_test3(i64 %x) { 5906 ; GENERIC-LABEL: mov_test3: 5907 ; GENERIC: # %bb.0: 5908 ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 5909 ; GENERIC-NEXT: retq # sched: [1:1.00] 5910 ; 5911 ; SKX-LABEL: mov_test3: 5912 ; SKX: # %bb.0: 5913 ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 5914 ; SKX-NEXT: retq # sched: [7:1.00] 5915 %res = insertelement <2 x i64>undef, i64 %x, i32 0 5916 ret <2 x i64>%res 5917 } 5918 5919 define <4 x i32> @mov_test4(i32* %x) { 5920 ; GENERIC-LABEL: mov_test4: 5921 ; GENERIC: # %bb.0: 5922 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 5923 ; GENERIC-NEXT: retq # sched: [1:1.00] 5924 ; 5925 ; SKX-LABEL: mov_test4: 5926 ; SKX: # %bb.0: 5927 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 5928 ; SKX-NEXT: retq # sched: [7:1.00] 5929 %y = load i32, i32* %x 5930 %res = insertelement <4 x i32>undef, i32 %y, i32 0 5931 ret <4 x i32>%res 5932 } 5933 5934 define void @mov_test5(float %x, float* %y) { 5935 ; GENERIC-LABEL: mov_test5: 5936 ; GENERIC: # %bb.0: 5937 ; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] 5938 ; GENERIC-NEXT: retq # sched: [1:1.00] 5939 ; 5940 ; SKX-LABEL: mov_test5: 5941 ; SKX: # %bb.0: 5942 ; SKX-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] 5943 ; SKX-NEXT: retq # sched: [7:1.00] 5944 store float %x, float* %y, align 4 5945 ret void 5946 } 5947 5948 define void @mov_test6(double %x, double* %y) { 5949 ; GENERIC-LABEL: mov_test6: 5950 ; GENERIC: # %bb.0: 5951 ; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] 5952 ; GENERIC-NEXT: retq # sched: [1:1.00] 5953 ; 5954 ; SKX-LABEL: mov_test6: 5955 ; SKX: # %bb.0: 5956 ; SKX-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] 5957 ; SKX-NEXT: retq # sched: [7:1.00] 5958 store double %x, double* %y, align 8 5959 ret void 5960 } 5961 5962 define float @mov_test7(i32* %x) { 5963 ; GENERIC-LABEL: mov_test7: 5964 ; GENERIC: # %bb.0: 5965 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 5966 ; GENERIC-NEXT: retq # sched: [1:1.00] 5967 ; 5968 ; SKX-LABEL: mov_test7: 5969 ; SKX: # %bb.0: 5970 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 5971 ; SKX-NEXT: retq # sched: [7:1.00] 5972 %y = load i32, i32* %x 5973 %res = bitcast i32 %y to float 5974 ret float %res 5975 } 5976 5977 define i32 @mov_test8(<4 x i32> %x) { 5978 ; GENERIC-LABEL: mov_test8: 5979 ; GENERIC: # %bb.0: 5980 ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5981 ; GENERIC-NEXT: retq # sched: [1:1.00] 5982 ; 5983 ; SKX-LABEL: mov_test8: 5984 ; SKX: # %bb.0: 5985 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5986 ; SKX-NEXT: retq # sched: [7:1.00] 5987 %res = extractelement <4 x i32> %x, i32 0 5988 ret i32 %res 5989 } 5990 5991 define i64 @mov_test9(<2 x i64> %x) { 5992 ; GENERIC-LABEL: mov_test9: 5993 ; GENERIC: # %bb.0: 5994 ; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 5995 ; GENERIC-NEXT: retq # sched: [1:1.00] 5996 ; 5997 ; SKX-LABEL: mov_test9: 5998 ; SKX: # %bb.0: 5999 ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 6000 ; SKX-NEXT: retq # sched: [7:1.00] 6001 %res = extractelement <2 x i64> %x, i32 0 6002 ret i64 %res 6003 } 6004 6005 define <4 x i32> @mov_test10(i32* %x) { 6006 ; GENERIC-LABEL: mov_test10: 6007 ; GENERIC: # %bb.0: 6008 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 6009 ; GENERIC-NEXT: retq # sched: [1:1.00] 6010 ; 6011 ; SKX-LABEL: mov_test10: 6012 ; SKX: # %bb.0: 6013 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 6014 ; SKX-NEXT: retq # sched: [7:1.00] 6015 %y = load i32, i32* %x, align 4 6016 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 6017 ret <4 x i32>%res 6018 } 6019 6020 define <4 x float> @mov_test11(float* %x) { 6021 ; GENERIC-LABEL: mov_test11: 6022 ; GENERIC: # %bb.0: 6023 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 6024 ; GENERIC-NEXT: retq # sched: [1:1.00] 6025 ; 6026 ; SKX-LABEL: mov_test11: 6027 ; SKX: # %bb.0: 6028 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 6029 ; SKX-NEXT: retq # sched: [7:1.00] 6030 %y = load float, float* %x, align 4 6031 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 6032 ret <4 x float>%res 6033 } 6034 6035 define <2 x double> @mov_test12(double* %x) { 6036 ; GENERIC-LABEL: mov_test12: 6037 ; GENERIC: # %bb.0: 6038 ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 6039 ; GENERIC-NEXT: retq # sched: [1:1.00] 6040 ; 6041 ; SKX-LABEL: mov_test12: 6042 ; SKX: # %bb.0: 6043 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 6044 ; SKX-NEXT: retq # sched: [7:1.00] 6045 %y = load double, double* %x, align 8 6046 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 6047 ret <2 x double>%res 6048 } 6049 6050 define <2 x i64> @mov_test13(i64 %x) { 6051 ; GENERIC-LABEL: mov_test13: 6052 ; GENERIC: # %bb.0: 6053 ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 6054 ; GENERIC-NEXT: retq # sched: [1:1.00] 6055 ; 6056 ; SKX-LABEL: mov_test13: 6057 ; SKX: # %bb.0: 6058 ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 6059 ; SKX-NEXT: retq # sched: [7:1.00] 6060 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 6061 ret <2 x i64>%res 6062 } 6063 6064 define <4 x i32> @mov_test14(i32 %x) { 6065 ; GENERIC-LABEL: mov_test14: 6066 ; GENERIC: # %bb.0: 6067 ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 6068 ; GENERIC-NEXT: retq # sched: [1:1.00] 6069 ; 6070 ; SKX-LABEL: mov_test14: 6071 ; SKX: # %bb.0: 6072 ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 6073 ; SKX-NEXT: retq # sched: [7:1.00] 6074 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 6075 ret <4 x i32>%res 6076 } 6077 6078 define <4 x i32> @mov_test15(i32* %x) { 6079 ; GENERIC-LABEL: mov_test15: 6080 ; GENERIC: # %bb.0: 6081 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 6082 ; GENERIC-NEXT: retq # sched: [1:1.00] 6083 ; 6084 ; SKX-LABEL: mov_test15: 6085 ; SKX: # %bb.0: 6086 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 6087 ; SKX-NEXT: retq # sched: [7:1.00] 6088 %y = load i32, i32* %x, align 4 6089 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 6090 ret <4 x i32>%res 6091 } 6092 6093 define <16 x i32> @mov_test16(i8 * %addr) { 6094 ; GENERIC-LABEL: mov_test16: 6095 ; GENERIC: # %bb.0: 6096 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6097 ; GENERIC-NEXT: retq # sched: [1:1.00] 6098 ; 6099 ; SKX-LABEL: mov_test16: 6100 ; SKX: # %bb.0: 6101 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6102 ; SKX-NEXT: retq # sched: [7:1.00] 6103 %vaddr = bitcast i8* %addr to <16 x i32>* 6104 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 6105 ret <16 x i32>%res 6106 } 6107 6108 define <16 x i32> @mov_test17(i8 * %addr) { 6109 ; GENERIC-LABEL: mov_test17: 6110 ; GENERIC: # %bb.0: 6111 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6112 ; GENERIC-NEXT: retq # sched: [1:1.00] 6113 ; 6114 ; SKX-LABEL: mov_test17: 6115 ; SKX: # %bb.0: 6116 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6117 ; SKX-NEXT: retq # sched: [7:1.00] 6118 %vaddr = bitcast i8* %addr to <16 x i32>* 6119 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 6120 ret <16 x i32>%res 6121 } 6122 6123 define void @mov_test18(i8 * %addr, <8 x i64> %data) { 6124 ; GENERIC-LABEL: mov_test18: 6125 ; GENERIC: # %bb.0: 6126 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6127 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6128 ; GENERIC-NEXT: retq # sched: [1:1.00] 6129 ; 6130 ; SKX-LABEL: mov_test18: 6131 ; SKX: # %bb.0: 6132 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6133 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6134 ; SKX-NEXT: retq # sched: [7:1.00] 6135 %vaddr = bitcast i8* %addr to <8 x i64>* 6136 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 6137 ret void 6138 } 6139 6140 define void @mov_test19(i8 * %addr, <16 x i32> %data) { 6141 ; GENERIC-LABEL: mov_test19: 6142 ; GENERIC: # %bb.0: 6143 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6144 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6145 ; GENERIC-NEXT: retq # sched: [1:1.00] 6146 ; 6147 ; SKX-LABEL: mov_test19: 6148 ; SKX: # %bb.0: 6149 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6150 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6151 ; SKX-NEXT: retq # sched: [7:1.00] 6152 %vaddr = bitcast i8* %addr to <16 x i32>* 6153 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 6154 ret void 6155 } 6156 6157 define void @mov_test20(i8 * %addr, <16 x i32> %data) { 6158 ; GENERIC-LABEL: mov_test20: 6159 ; GENERIC: # %bb.0: 6160 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6161 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6162 ; GENERIC-NEXT: retq # sched: [1:1.00] 6163 ; 6164 ; SKX-LABEL: mov_test20: 6165 ; SKX: # %bb.0: 6166 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6167 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6168 ; SKX-NEXT: retq # sched: [7:1.00] 6169 %vaddr = bitcast i8* %addr to <16 x i32>* 6170 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 6171 ret void 6172 } 6173 6174 define <8 x i64> @mov_test21(i8 * %addr) { 6175 ; GENERIC-LABEL: mov_test21: 6176 ; GENERIC: # %bb.0: 6177 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6178 ; GENERIC-NEXT: retq # sched: [1:1.00] 6179 ; 6180 ; SKX-LABEL: mov_test21: 6181 ; SKX: # %bb.0: 6182 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6183 ; SKX-NEXT: retq # sched: [7:1.00] 6184 %vaddr = bitcast i8* %addr to <8 x i64>* 6185 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 6186 ret <8 x i64>%res 6187 } 6188 6189 define void @mov_test22(i8 * %addr, <8 x i64> %data) { 6190 ; GENERIC-LABEL: mov_test22: 6191 ; GENERIC: # %bb.0: 6192 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6193 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6194 ; GENERIC-NEXT: retq # sched: [1:1.00] 6195 ; 6196 ; SKX-LABEL: mov_test22: 6197 ; SKX: # %bb.0: 6198 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6199 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6200 ; SKX-NEXT: retq # sched: [7:1.00] 6201 %vaddr = bitcast i8* %addr to <8 x i64>* 6202 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 6203 ret void 6204 } 6205 6206 define <8 x i64> @mov_test23(i8 * %addr) { 6207 ; GENERIC-LABEL: mov_test23: 6208 ; GENERIC: # %bb.0: 6209 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6210 ; GENERIC-NEXT: retq # sched: [1:1.00] 6211 ; 6212 ; SKX-LABEL: mov_test23: 6213 ; SKX: # %bb.0: 6214 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6215 ; SKX-NEXT: retq # sched: [7:1.00] 6216 %vaddr = bitcast i8* %addr to <8 x i64>* 6217 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 6218 ret <8 x i64>%res 6219 } 6220 6221 define void @mov_test24(i8 * %addr, <8 x double> %data) { 6222 ; GENERIC-LABEL: mov_test24: 6223 ; GENERIC: # %bb.0: 6224 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6225 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6226 ; GENERIC-NEXT: retq # sched: [1:1.00] 6227 ; 6228 ; SKX-LABEL: mov_test24: 6229 ; SKX: # %bb.0: 6230 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6231 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6232 ; SKX-NEXT: retq # sched: [7:1.00] 6233 %vaddr = bitcast i8* %addr to <8 x double>* 6234 store <8 x double>%data, <8 x double>* %vaddr, align 64 6235 ret void 6236 } 6237 6238 define <8 x double> @mov_test25(i8 * %addr) { 6239 ; GENERIC-LABEL: mov_test25: 6240 ; GENERIC: # %bb.0: 6241 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6242 ; GENERIC-NEXT: retq # sched: [1:1.00] 6243 ; 6244 ; SKX-LABEL: mov_test25: 6245 ; SKX: # %bb.0: 6246 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6247 ; SKX-NEXT: retq # sched: [7:1.00] 6248 %vaddr = bitcast i8* %addr to <8 x double>* 6249 %res = load <8 x double>, <8 x double>* %vaddr, align 64 6250 ret <8 x double>%res 6251 } 6252 6253 define void @mov_test26(i8 * %addr, <16 x float> %data) { 6254 ; GENERIC-LABEL: mov_test26: 6255 ; GENERIC: # %bb.0: 6256 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6257 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6258 ; GENERIC-NEXT: retq # sched: [1:1.00] 6259 ; 6260 ; SKX-LABEL: mov_test26: 6261 ; SKX: # %bb.0: 6262 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6263 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6264 ; SKX-NEXT: retq # sched: [7:1.00] 6265 %vaddr = bitcast i8* %addr to <16 x float>* 6266 store <16 x float>%data, <16 x float>* %vaddr, align 64 6267 ret void 6268 } 6269 6270 define <16 x float> @mov_test27(i8 * %addr) { 6271 ; GENERIC-LABEL: mov_test27: 6272 ; GENERIC: # %bb.0: 6273 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6274 ; GENERIC-NEXT: retq # sched: [1:1.00] 6275 ; 6276 ; SKX-LABEL: mov_test27: 6277 ; SKX: # %bb.0: 6278 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6279 ; SKX-NEXT: retq # sched: [7:1.00] 6280 %vaddr = bitcast i8* %addr to <16 x float>* 6281 %res = load <16 x float>, <16 x float>* %vaddr, align 64 6282 ret <16 x float>%res 6283 } 6284 6285 define void @mov_test28(i8 * %addr, <8 x double> %data) { 6286 ; GENERIC-LABEL: mov_test28: 6287 ; GENERIC: # %bb.0: 6288 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6289 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6290 ; GENERIC-NEXT: retq # sched: [1:1.00] 6291 ; 6292 ; SKX-LABEL: mov_test28: 6293 ; SKX: # %bb.0: 6294 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6295 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6296 ; SKX-NEXT: retq # sched: [7:1.00] 6297 %vaddr = bitcast i8* %addr to <8 x double>* 6298 store <8 x double>%data, <8 x double>* %vaddr, align 1 6299 ret void 6300 } 6301 6302 define <8 x double> @mov_test29(i8 * %addr) { 6303 ; GENERIC-LABEL: mov_test29: 6304 ; GENERIC: # %bb.0: 6305 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6306 ; GENERIC-NEXT: retq # sched: [1:1.00] 6307 ; 6308 ; SKX-LABEL: mov_test29: 6309 ; SKX: # %bb.0: 6310 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6311 ; SKX-NEXT: retq # sched: [7:1.00] 6312 %vaddr = bitcast i8* %addr to <8 x double>* 6313 %res = load <8 x double>, <8 x double>* %vaddr, align 1 6314 ret <8 x double>%res 6315 } 6316 6317 define void @mov_test30(i8 * %addr, <16 x float> %data) { 6318 ; GENERIC-LABEL: mov_test30: 6319 ; GENERIC: # %bb.0: 6320 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6321 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6322 ; GENERIC-NEXT: retq # sched: [1:1.00] 6323 ; 6324 ; SKX-LABEL: mov_test30: 6325 ; SKX: # %bb.0: 6326 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6327 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6328 ; SKX-NEXT: retq # sched: [7:1.00] 6329 %vaddr = bitcast i8* %addr to <16 x float>* 6330 store <16 x float>%data, <16 x float>* %vaddr, align 1 6331 ret void 6332 } 6333 6334 define <16 x float> @mov_test31(i8 * %addr) { 6335 ; GENERIC-LABEL: mov_test31: 6336 ; GENERIC: # %bb.0: 6337 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6338 ; GENERIC-NEXT: retq # sched: [1:1.00] 6339 ; 6340 ; SKX-LABEL: mov_test31: 6341 ; SKX: # %bb.0: 6342 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6343 ; SKX-NEXT: retq # sched: [7:1.00] 6344 %vaddr = bitcast i8* %addr to <16 x float>* 6345 %res = load <16 x float>, <16 x float>* %vaddr, align 1 6346 ret <16 x float>%res 6347 } 6348 6349 define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 6350 ; GENERIC-LABEL: mov_test32: 6351 ; GENERIC: # %bb.0: 6352 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 6353 ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6354 ; GENERIC-NEXT: retq # sched: [1:1.00] 6355 ; 6356 ; SKX-LABEL: mov_test32: 6357 ; SKX: # %bb.0: 6358 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 6359 ; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6360 ; SKX-NEXT: retq # sched: [7:1.00] 6361 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6362 %vaddr = bitcast i8* %addr to <16 x i32>* 6363 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 6364 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 6365 ret <16 x i32>%res 6366 } 6367 6368 define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 6369 ; GENERIC-LABEL: mov_test33: 6370 ; GENERIC: # %bb.0: 6371 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 6372 ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6373 ; GENERIC-NEXT: retq # sched: [1:1.00] 6374 ; 6375 ; SKX-LABEL: mov_test33: 6376 ; SKX: # %bb.0: 6377 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 6378 ; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6379 ; SKX-NEXT: retq # sched: [7:1.00] 6380 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6381 %vaddr = bitcast i8* %addr to <16 x i32>* 6382 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 6383 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 6384 ret <16 x i32>%res 6385 } 6386 6387 define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { 6388 ; GENERIC-LABEL: mov_test34: 6389 ; GENERIC: # %bb.0: 6390 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 6391 ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6392 ; GENERIC-NEXT: retq # sched: [1:1.00] 6393 ; 6394 ; SKX-LABEL: mov_test34: 6395 ; SKX: # %bb.0: 6396 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 6397 ; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6398 ; SKX-NEXT: retq # sched: [7:1.00] 6399 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6400 %vaddr = bitcast i8* %addr to <16 x i32>* 6401 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 6402 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 6403 ret <16 x i32>%res 6404 } 6405 6406 define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { 6407 ; GENERIC-LABEL: mov_test35: 6408 ; GENERIC: # %bb.0: 6409 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 6410 ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6411 ; GENERIC-NEXT: retq # sched: [1:1.00] 6412 ; 6413 ; SKX-LABEL: mov_test35: 6414 ; SKX: # %bb.0: 6415 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 6416 ; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6417 ; SKX-NEXT: retq # sched: [7:1.00] 6418 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6419 %vaddr = bitcast i8* %addr to <16 x i32>* 6420 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 6421 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 6422 ret <16 x i32>%res 6423 } 6424 6425 define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 6426 ; GENERIC-LABEL: mov_test36: 6427 ; GENERIC: # %bb.0: 6428 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 6429 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6430 ; GENERIC-NEXT: retq # sched: [1:1.00] 6431 ; 6432 ; SKX-LABEL: mov_test36: 6433 ; SKX: # %bb.0: 6434 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 6435 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6436 ; SKX-NEXT: retq # sched: [7:1.00] 6437 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6438 %vaddr = bitcast i8* %addr to <8 x i64>* 6439 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 6440 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 6441 ret <8 x i64>%res 6442 } 6443 6444 define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 6445 ; GENERIC-LABEL: mov_test37: 6446 ; GENERIC: # %bb.0: 6447 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 6448 ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6449 ; GENERIC-NEXT: retq # sched: [1:1.00] 6450 ; 6451 ; SKX-LABEL: mov_test37: 6452 ; SKX: # %bb.0: 6453 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 6454 ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6455 ; SKX-NEXT: retq # sched: [7:1.00] 6456 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6457 %vaddr = bitcast i8* %addr to <8 x i64>* 6458 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 6459 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 6460 ret <8 x i64>%res 6461 } 6462 6463 define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { 6464 ; GENERIC-LABEL: mov_test38: 6465 ; GENERIC: # %bb.0: 6466 ; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 6467 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6468 ; GENERIC-NEXT: retq # sched: [1:1.00] 6469 ; 6470 ; SKX-LABEL: mov_test38: 6471 ; SKX: # %bb.0: 6472 ; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 6473 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6474 ; SKX-NEXT: retq # sched: [7:1.00] 6475 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6476 %vaddr = bitcast i8* %addr to <8 x i64>* 6477 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 6478 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 6479 ret <8 x i64>%res 6480 } 6481 6482 define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { 6483 ; GENERIC-LABEL: mov_test39: 6484 ; GENERIC: # %bb.0: 6485 ; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 6486 ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6487 ; GENERIC-NEXT: retq # sched: [1:1.00] 6488 ; 6489 ; SKX-LABEL: mov_test39: 6490 ; SKX: # %bb.0: 6491 ; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 6492 ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6493 ; SKX-NEXT: retq # sched: [7:1.00] 6494 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6495 %vaddr = bitcast i8* %addr to <8 x i64>* 6496 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 6497 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 6498 ret <8 x i64>%res 6499 } 6500 6501 define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 6502 ; GENERIC-LABEL: mov_test40: 6503 ; GENERIC: # %bb.0: 6504 ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6505 ; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6506 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6507 ; GENERIC-NEXT: retq # sched: [1:1.00] 6508 ; 6509 ; SKX-LABEL: mov_test40: 6510 ; SKX: # %bb.0: 6511 ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6512 ; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6513 ; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6514 ; SKX-NEXT: retq # sched: [7:1.00] 6515 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6516 %vaddr = bitcast i8* %addr to <16 x float>* 6517 %r = load <16 x float>, <16 x float>* %vaddr, align 64 6518 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 6519 ret <16 x float>%res 6520 } 6521 6522 define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 6523 ; GENERIC-LABEL: mov_test41: 6524 ; GENERIC: # %bb.0: 6525 ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6526 ; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6527 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6528 ; GENERIC-NEXT: retq # sched: [1:1.00] 6529 ; 6530 ; SKX-LABEL: mov_test41: 6531 ; SKX: # %bb.0: 6532 ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6533 ; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6534 ; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6535 ; SKX-NEXT: retq # sched: [7:1.00] 6536 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6537 %vaddr = bitcast i8* %addr to <16 x float>* 6538 %r = load <16 x float>, <16 x float>* %vaddr, align 1 6539 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 6540 ret <16 x float>%res 6541 } 6542 6543 define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) { 6544 ; GENERIC-LABEL: mov_test42: 6545 ; GENERIC: # %bb.0: 6546 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6547 ; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6548 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6549 ; GENERIC-NEXT: retq # sched: [1:1.00] 6550 ; 6551 ; SKX-LABEL: mov_test42: 6552 ; SKX: # %bb.0: 6553 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6554 ; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6555 ; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6556 ; SKX-NEXT: retq # sched: [7:1.00] 6557 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6558 %vaddr = bitcast i8* %addr to <16 x float>* 6559 %r = load <16 x float>, <16 x float>* %vaddr, align 64 6560 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 6561 ret <16 x float>%res 6562 } 6563 6564 define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) { 6565 ; GENERIC-LABEL: mov_test43: 6566 ; GENERIC: # %bb.0: 6567 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6568 ; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6569 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6570 ; GENERIC-NEXT: retq # sched: [1:1.00] 6571 ; 6572 ; SKX-LABEL: mov_test43: 6573 ; SKX: # %bb.0: 6574 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6575 ; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6576 ; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6577 ; SKX-NEXT: retq # sched: [7:1.00] 6578 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6579 %vaddr = bitcast i8* %addr to <16 x float>* 6580 %r = load <16 x float>, <16 x float>* %vaddr, align 1 6581 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 6582 ret <16 x float>%res 6583 } 6584 6585 define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 6586 ; GENERIC-LABEL: mov_test44: 6587 ; GENERIC: # %bb.0: 6588 ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6589 ; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6590 ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6591 ; GENERIC-NEXT: retq # sched: [1:1.00] 6592 ; 6593 ; SKX-LABEL: mov_test44: 6594 ; SKX: # %bb.0: 6595 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6596 ; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6597 ; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6598 ; SKX-NEXT: retq # sched: [7:1.00] 6599 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6600 %vaddr = bitcast i8* %addr to <8 x double>* 6601 %r = load <8 x double>, <8 x double>* %vaddr, align 64 6602 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 6603 ret <8 x double>%res 6604 } 6605 6606 define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 6607 ; GENERIC-LABEL: mov_test45: 6608 ; GENERIC: # %bb.0: 6609 ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6610 ; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6611 ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6612 ; GENERIC-NEXT: retq # sched: [1:1.00] 6613 ; 6614 ; SKX-LABEL: mov_test45: 6615 ; SKX: # %bb.0: 6616 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6617 ; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6618 ; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6619 ; SKX-NEXT: retq # sched: [7:1.00] 6620 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6621 %vaddr = bitcast i8* %addr to <8 x double>* 6622 %r = load <8 x double>, <8 x double>* %vaddr, align 1 6623 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 6624 ret <8 x double>%res 6625 } 6626 6627 define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) { 6628 ; GENERIC-LABEL: mov_test46: 6629 ; GENERIC: # %bb.0: 6630 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6631 ; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6632 ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6633 ; GENERIC-NEXT: retq # sched: [1:1.00] 6634 ; 6635 ; SKX-LABEL: mov_test46: 6636 ; SKX: # %bb.0: 6637 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6638 ; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6639 ; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6640 ; SKX-NEXT: retq # sched: [7:1.00] 6641 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6642 %vaddr = bitcast i8* %addr to <8 x double>* 6643 %r = load <8 x double>, <8 x double>* %vaddr, align 64 6644 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 6645 ret <8 x double>%res 6646 } 6647 6648 define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) { 6649 ; GENERIC-LABEL: mov_test47: 6650 ; GENERIC: # %bb.0: 6651 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6652 ; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6653 ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6654 ; GENERIC-NEXT: retq # sched: [1:1.00] 6655 ; 6656 ; SKX-LABEL: mov_test47: 6657 ; SKX: # %bb.0: 6658 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6659 ; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6660 ; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6661 ; SKX-NEXT: retq # sched: [7:1.00] 6662 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6663 %vaddr = bitcast i8* %addr to <8 x double>* 6664 %r = load <8 x double>, <8 x double>* %vaddr, align 1 6665 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 6666 ret <8 x double>%res 6667 } 6668 6669 define i16 @mask16(i16 %x) { 6670 ; GENERIC-LABEL: mask16: 6671 ; GENERIC: # %bb.0: 6672 ; GENERIC-NEXT: notl %edi # sched: [1:0.33] 6673 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6674 ; GENERIC-NEXT: retq # sched: [1:1.00] 6675 ; 6676 ; SKX-LABEL: mask16: 6677 ; SKX: # %bb.0: 6678 ; SKX-NEXT: notl %edi # sched: [1:0.25] 6679 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6680 ; SKX-NEXT: retq # sched: [7:1.00] 6681 %m0 = bitcast i16 %x to <16 x i1> 6682 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6683 %ret = bitcast <16 x i1> %m1 to i16 6684 ret i16 %ret 6685 } 6686 6687 define i32 @mask16_zext(i16 %x) { 6688 ; GENERIC-LABEL: mask16_zext: 6689 ; GENERIC: # %bb.0: 6690 ; GENERIC-NEXT: notl %edi # sched: [1:0.33] 6691 ; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33] 6692 ; GENERIC-NEXT: retq # sched: [1:1.00] 6693 ; 6694 ; SKX-LABEL: mask16_zext: 6695 ; SKX: # %bb.0: 6696 ; SKX-NEXT: notl %edi # sched: [1:0.25] 6697 ; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25] 6698 ; SKX-NEXT: retq # sched: [7:1.00] 6699 %m0 = bitcast i16 %x to <16 x i1> 6700 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6701 %m2 = bitcast <16 x i1> %m1 to i16 6702 %ret = zext i16 %m2 to i32 6703 ret i32 %ret 6704 } 6705 6706 define i8 @mask8(i8 %x) { 6707 ; GENERIC-LABEL: mask8: 6708 ; GENERIC: # %bb.0: 6709 ; GENERIC-NEXT: notb %dil # sched: [1:0.33] 6710 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6711 ; GENERIC-NEXT: retq # sched: [1:1.00] 6712 ; 6713 ; SKX-LABEL: mask8: 6714 ; SKX: # %bb.0: 6715 ; SKX-NEXT: notb %dil # sched: [1:0.25] 6716 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6717 ; SKX-NEXT: retq # sched: [7:1.00] 6718 %m0 = bitcast i8 %x to <8 x i1> 6719 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6720 %ret = bitcast <8 x i1> %m1 to i8 6721 ret i8 %ret 6722 } 6723 6724 define i32 @mask8_zext(i8 %x) { 6725 ; GENERIC-LABEL: mask8_zext: 6726 ; GENERIC: # %bb.0: 6727 ; GENERIC-NEXT: notb %dil # sched: [1:0.33] 6728 ; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33] 6729 ; GENERIC-NEXT: retq # sched: [1:1.00] 6730 ; 6731 ; SKX-LABEL: mask8_zext: 6732 ; SKX: # %bb.0: 6733 ; SKX-NEXT: notb %dil # sched: [1:0.25] 6734 ; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25] 6735 ; SKX-NEXT: retq # sched: [7:1.00] 6736 %m0 = bitcast i8 %x to <8 x i1> 6737 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6738 %m2 = bitcast <8 x i1> %m1 to i8 6739 %ret = zext i8 %m2 to i32 6740 ret i32 %ret 6741 } 6742 6743 define void @mask16_mem(i16* %ptr) { 6744 ; GENERIC-LABEL: mask16_mem: 6745 ; GENERIC: # %bb.0: 6746 ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] 6747 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 6748 ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 6749 ; GENERIC-NEXT: retq # sched: [1:1.00] 6750 ; 6751 ; SKX-LABEL: mask16_mem: 6752 ; SKX: # %bb.0: 6753 ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] 6754 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 6755 ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 6756 ; SKX-NEXT: retq # sched: [7:1.00] 6757 %x = load i16, i16* %ptr, align 4 6758 %m0 = bitcast i16 %x to <16 x i1> 6759 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6760 %ret = bitcast <16 x i1> %m1 to i16 6761 store i16 %ret, i16* %ptr, align 4 6762 ret void 6763 } 6764 6765 define void @mask8_mem(i8* %ptr) { 6766 ; GENERIC-LABEL: mask8_mem: 6767 ; GENERIC: # %bb.0: 6768 ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 6769 ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] 6770 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 6771 ; GENERIC-NEXT: retq # sched: [1:1.00] 6772 ; 6773 ; SKX-LABEL: mask8_mem: 6774 ; SKX: # %bb.0: 6775 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 6776 ; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] 6777 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 6778 ; SKX-NEXT: retq # sched: [7:1.00] 6779 %x = load i8, i8* %ptr, align 4 6780 %m0 = bitcast i8 %x to <8 x i1> 6781 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6782 %ret = bitcast <8 x i1> %m1 to i8 6783 store i8 %ret, i8* %ptr, align 4 6784 ret void 6785 } 6786 6787 define i16 @mand16(i16 %x, i16 %y) { 6788 ; GENERIC-LABEL: mand16: 6789 ; GENERIC: # %bb.0: 6790 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6791 ; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] 6792 ; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33] 6793 ; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33] 6794 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6795 ; GENERIC-NEXT: retq # sched: [1:1.00] 6796 ; 6797 ; SKX-LABEL: mand16: 6798 ; SKX: # %bb.0: 6799 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6800 ; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] 6801 ; SKX-NEXT: andl %esi, %edi # sched: [1:0.25] 6802 ; SKX-NEXT: orl %eax, %edi # sched: [1:0.25] 6803 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6804 ; SKX-NEXT: retq # sched: [7:1.00] 6805 %ma = bitcast i16 %x to <16 x i1> 6806 %mb = bitcast i16 %y to <16 x i1> 6807 %mc = and <16 x i1> %ma, %mb 6808 %md = xor <16 x i1> %ma, %mb 6809 %me = or <16 x i1> %mc, %md 6810 %ret = bitcast <16 x i1> %me to i16 6811 ret i16 %ret 6812 } 6813 6814 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { 6815 ; GENERIC-LABEL: mand16_mem: 6816 ; GENERIC: # %bb.0: 6817 ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] 6818 ; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50] 6819 ; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:0.33] 6820 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 6821 ; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:0.33] 6822 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6823 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 6824 ; GENERIC-NEXT: retq # sched: [1:1.00] 6825 ; 6826 ; SKX-LABEL: mand16_mem: 6827 ; SKX: # %bb.0: 6828 ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] 6829 ; SKX-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00] 6830 ; SKX-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] 6831 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 6832 ; SKX-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] 6833 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6834 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 6835 ; SKX-NEXT: retq # sched: [7:1.00] 6836 %ma = load <16 x i1>, <16 x i1>* %x 6837 %mb = load <16 x i1>, <16 x i1>* %y 6838 %mc = and <16 x i1> %ma, %mb 6839 %md = xor <16 x i1> %ma, %mb 6840 %me = or <16 x i1> %mc, %md 6841 %ret = bitcast <16 x i1> %me to i16 6842 ret i16 %ret 6843 } 6844 6845 define i8 @shuf_test1(i16 %v) nounwind { 6846 ; GENERIC-LABEL: shuf_test1: 6847 ; GENERIC: # %bb.0: 6848 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 6849 ; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00] 6850 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6851 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax 6852 ; GENERIC-NEXT: retq # sched: [1:1.00] 6853 ; 6854 ; SKX-LABEL: shuf_test1: 6855 ; SKX: # %bb.0: 6856 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 6857 ; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00] 6858 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6859 ; SKX-NEXT: # kill: def $al killed $al killed $eax 6860 ; SKX-NEXT: retq # sched: [7:1.00] 6861 %v1 = bitcast i16 %v to <16 x i1> 6862 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6863 %mask1 = bitcast <8 x i1> %mask to i8 6864 ret i8 %mask1 6865 } 6866 6867 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 6868 ; GENERIC-LABEL: zext_test1: 6869 ; GENERIC: # %bb.0: 6870 ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] 6871 ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] 6872 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6873 ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] 6874 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6875 ; GENERIC-NEXT: retq # sched: [1:1.00] 6876 ; 6877 ; SKX-LABEL: zext_test1: 6878 ; SKX: # %bb.0: 6879 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] 6880 ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] 6881 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6882 ; SKX-NEXT: andl $1, %eax # sched: [1:0.25] 6883 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6884 ; SKX-NEXT: retq # sched: [7:1.00] 6885 %cmp_res = icmp ugt <16 x i32> %a, %b 6886 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 6887 %res = zext i1 %cmp_res.i1 to i32 6888 ret i32 %res 6889 } 6890 6891 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 6892 ; GENERIC-LABEL: zext_test2: 6893 ; GENERIC: # %bb.0: 6894 ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] 6895 ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] 6896 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6897 ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] 6898 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 6899 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6900 ; GENERIC-NEXT: retq # sched: [1:1.00] 6901 ; 6902 ; SKX-LABEL: zext_test2: 6903 ; SKX: # %bb.0: 6904 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] 6905 ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] 6906 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6907 ; SKX-NEXT: andl $1, %eax # sched: [1:0.25] 6908 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 6909 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6910 ; SKX-NEXT: retq # sched: [7:1.00] 6911 %cmp_res = icmp ugt <16 x i32> %a, %b 6912 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 6913 %res = zext i1 %cmp_res.i1 to i16 6914 ret i16 %res 6915 } 6916 6917 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 6918 ; GENERIC-LABEL: zext_test3: 6919 ; GENERIC: # %bb.0: 6920 ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] 6921 ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] 6922 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6923 ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] 6924 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax 6925 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6926 ; GENERIC-NEXT: retq # sched: [1:1.00] 6927 ; 6928 ; SKX-LABEL: zext_test3: 6929 ; SKX: # %bb.0: 6930 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] 6931 ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] 6932 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6933 ; SKX-NEXT: andb $1, %al # sched: [1:0.25] 6934 ; SKX-NEXT: # kill: def $al killed $al killed $eax 6935 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6936 ; SKX-NEXT: retq # sched: [7:1.00] 6937 %cmp_res = icmp ugt <16 x i32> %a, %b 6938 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 6939 %res = zext i1 %cmp_res.i1 to i8 6940 ret i8 %res 6941 } 6942 6943 define i8 @conv1(<8 x i1>* %R) { 6944 ; GENERIC-LABEL: conv1: 6945 ; GENERIC: # %bb.0: # %entry 6946 ; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00] 6947 ; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 6948 ; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33] 6949 ; GENERIC-NEXT: retq # sched: [1:1.00] 6950 ; 6951 ; SKX-LABEL: conv1: 6952 ; SKX: # %bb.0: # %entry 6953 ; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00] 6954 ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 6955 ; SKX-NEXT: movb $-2, %al # sched: [1:0.25] 6956 ; SKX-NEXT: retq # sched: [7:1.00] 6957 entry: 6958 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R 6959 6960 %maskPtr = alloca <8 x i1> 6961 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr 6962 %mask = load <8 x i1>, <8 x i1>* %maskPtr 6963 %mask_convert = bitcast <8 x i1> %mask to i8 6964 ret i8 %mask_convert 6965 } 6966 6967 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { 6968 ; GENERIC-LABEL: test4: 6969 ; GENERIC: # %bb.0: 6970 ; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50] 6971 ; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50] 6972 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 6973 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6974 ; GENERIC-NEXT: retq # sched: [1:1.00] 6975 ; 6976 ; SKX-LABEL: test4: 6977 ; SKX: # %bb.0: 6978 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00] 6979 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00] 6980 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 6981 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 6982 ; SKX-NEXT: retq # sched: [7:1.00] 6983 %x_gt_y = icmp sgt <4 x i64> %x, %y 6984 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 6985 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 6986 %resse = sext <4 x i1>%res to <4 x i32> 6987 ret <4 x i32> %resse 6988 } 6989 6990 define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { 6991 ; GENERIC-LABEL: vcmp_test5: 6992 ; GENERIC: # %bb.0: 6993 ; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50] 6994 ; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50] 6995 ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] 6996 ; GENERIC-NEXT: retq # sched: [1:1.00] 6997 ; 6998 ; SKX-LABEL: vcmp_test5: 6999 ; SKX: # %bb.0: 7000 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00] 7001 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00] 7002 ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] 7003 ; SKX-NEXT: retq # sched: [7:1.00] 7004 %x_gt_y = icmp slt <2 x i64> %x, %y 7005 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 7006 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 7007 %resse = sext <2 x i1>%res to <2 x i64> 7008 ret <2 x i64> %resse 7009 }define void @vcmp_test6(<16 x i1> %mask) { 7010 allocas: 7011 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 7012 %b = bitcast <16 x i1> %a to i16 7013 %c = icmp eq i16 %b, 0 7014 br i1 %c, label %true, label %false 7015 7016 true: 7017 ret void 7018 7019 false: 7020 ret void 7021 } 7022 define void @vcmp_test7(<8 x i1> %mask) { 7023 ; GENERIC-LABEL: vcmp_test7: 7024 ; GENERIC: # %bb.0: # %allocas 7025 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7026 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7027 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 7028 ; GENERIC-NEXT: orb $85, %al # sched: [1:0.33] 7029 ; GENERIC-NEXT: retq # sched: [1:1.00] 7030 ; 7031 ; SKX-LABEL: vcmp_test7: 7032 ; SKX: # %bb.0: # %allocas 7033 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7034 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7035 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 7036 ; SKX-NEXT: orb $85, %al # sched: [1:0.25] 7037 ; SKX-NEXT: retq # sched: [7:1.00] 7038 allocas: 7039 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 7040 %b = bitcast <8 x i1> %a to i8 7041 %c = icmp eq i8 %b, 0 7042 br i1 %c, label %true, label %false 7043 7044 true: 7045 ret void 7046 7047 false: 7048 ret void 7049 } 7050 define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { 7051 ; GENERIC-LABEL: vcmp_test8: 7052 ; GENERIC: # %bb.0: 7053 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7054 ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] 7055 ; GENERIC-NEXT: # %bb.2: 7056 ; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] 7057 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7058 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7059 ; GENERIC-NEXT: retq # sched: [1:1.00] 7060 ; GENERIC-NEXT: .LBB386_1: 7061 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 7062 ; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] 7063 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7064 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7065 ; GENERIC-NEXT: retq # sched: [1:1.00] 7066 ; 7067 ; SKX-LABEL: vcmp_test8: 7068 ; SKX: # %bb.0: 7069 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7070 ; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] 7071 ; SKX-NEXT: # %bb.2: 7072 ; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] 7073 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7074 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7075 ; SKX-NEXT: retq # sched: [7:1.00] 7076 ; SKX-NEXT: .LBB386_1: 7077 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 7078 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] 7079 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7080 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7081 ; SKX-NEXT: retq # sched: [7:1.00] 7082 %cond = icmp sgt i32 %a1, %b1 7083 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer 7084 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer 7085 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 7086 %res = sext <16 x i1> %mix to <16 x i8> 7087 ret <16 x i8> %res 7088 } 7089 define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { 7090 ; GENERIC-LABEL: vpmov_test9: 7091 ; GENERIC: # %bb.0: 7092 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7093 ; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00] 7094 ; GENERIC-NEXT: # %bb.2: 7095 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00] 7096 ; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00] 7097 ; GENERIC-NEXT: .LBB387_1: 7098 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 7099 ; GENERIC-NEXT: .LBB387_3: 7100 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 7101 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7102 ; GENERIC-NEXT: retq # sched: [1:1.00] 7103 ; 7104 ; SKX-LABEL: vpmov_test9: 7105 ; SKX: # %bb.0: 7106 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7107 ; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50] 7108 ; SKX-NEXT: # %bb.2: 7109 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50] 7110 ; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50] 7111 ; SKX-NEXT: .LBB387_1: 7112 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 7113 ; SKX-NEXT: .LBB387_3: 7114 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 7115 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7116 ; SKX-NEXT: retq # sched: [7:1.00] 7117 %mask = icmp sgt i32 %a1, %b1 7118 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b 7119 ret <16 x i1>%c 7120 }define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { 7121 %mask = icmp sgt i32 %a1, %b1 7122 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b 7123 ret <8 x i1>%c 7124 } 7125 7126 define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { 7127 ; GENERIC-LABEL: vmov_test11: 7128 ; GENERIC: # %bb.0: 7129 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7130 ; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00] 7131 ; GENERIC-NEXT: # %bb.2: 7132 ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] 7133 ; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00] 7134 ; GENERIC-NEXT: .LBB389_1: 7135 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 7136 ; GENERIC-NEXT: .LBB389_3: 7137 ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] 7138 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 7139 ; GENERIC-NEXT: retq # sched: [1:1.00] 7140 ; 7141 ; SKX-LABEL: vmov_test11: 7142 ; SKX: # %bb.0: 7143 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7144 ; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50] 7145 ; SKX-NEXT: # %bb.2: 7146 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] 7147 ; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50] 7148 ; SKX-NEXT: .LBB389_1: 7149 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 7150 ; SKX-NEXT: .LBB389_3: 7151 ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] 7152 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 7153 ; SKX-NEXT: retq # sched: [7:1.00] 7154 %mask = icmp sgt i32 %a1, %b1 7155 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b 7156 ret <4 x i1>%c 7157 } 7158 7159 define i32 @vmov_test12(i32 %x, i32 %y) { 7160 ; GENERIC-LABEL: vmov_test12: 7161 ; GENERIC: # %bb.0: 7162 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 7163 ; GENERIC-NEXT: retq # sched: [1:1.00] 7164 ; 7165 ; SKX-LABEL: vmov_test12: 7166 ; SKX: # %bb.0: 7167 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 7168 ; SKX-NEXT: retq # sched: [7:1.00] 7169 %a = bitcast i16 21845 to <16 x i1> 7170 %b = extractelement <16 x i1> %a, i32 0 7171 %c = select i1 %b, i32 %x, i32 %y 7172 ret i32 %c 7173 } 7174 7175 define i32 @vmov_test13(i32 %x, i32 %y) { 7176 ; GENERIC-LABEL: vmov_test13: 7177 ; GENERIC: # %bb.0: 7178 ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] 7179 ; GENERIC-NEXT: retq # sched: [1:1.00] 7180 ; 7181 ; SKX-LABEL: vmov_test13: 7182 ; SKX: # %bb.0: 7183 ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] 7184 ; SKX-NEXT: retq # sched: [7:1.00] 7185 %a = bitcast i16 21845 to <16 x i1> 7186 %b = extractelement <16 x i1> %a, i32 3 7187 %c = select i1 %b, i32 %x, i32 %y 7188 ret i32 %c 7189 }define <4 x i1> @vmov_test14() { 7190 %a = bitcast i16 21845 to <16 x i1> 7191 %b = extractelement <16 x i1> %a, i32 2 7192 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 7193 ret <4 x i1> %c 7194 } 7195 7196 define <16 x i1> @vmov_test15(i32 %x, i32 %y) { 7197 ; GENERIC-LABEL: vmov_test15: 7198 ; GENERIC: # %bb.0: 7199 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7200 ; GENERIC-NEXT: movl $21845, %eax # imm = 0x5555 7201 ; GENERIC-NEXT: # sched: [1:0.33] 7202 ; GENERIC-NEXT: movl $1, %ecx # sched: [1:0.33] 7203 ; GENERIC-NEXT: cmovgl %eax, %ecx # sched: [2:0.67] 7204 ; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33] 7205 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7206 ; GENERIC-NEXT: retq # sched: [1:1.00] 7207 ; 7208 ; SKX-LABEL: vmov_test15: 7209 ; SKX: # %bb.0: 7210 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7211 ; SKX-NEXT: movl $21845, %eax # imm = 0x5555 7212 ; SKX-NEXT: # sched: [1:0.25] 7213 ; SKX-NEXT: movl $1, %ecx # sched: [1:0.25] 7214 ; SKX-NEXT: cmovgl %eax, %ecx # sched: [1:0.50] 7215 ; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] 7216 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7217 ; SKX-NEXT: retq # sched: [7:1.00] 7218 %a = bitcast i16 21845 to <16 x i1> 7219 %b = bitcast i16 1 to <16 x i1> 7220 %mask = icmp sgt i32 %x, %y 7221 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b 7222 ret <16 x i1> %c 7223 } 7224 7225 define <64 x i8> @vmov_test16(i64 %x) { 7226 ; 7227 ; GENERIC-LABEL: vmov_test16: 7228 ; GENERIC: # %bb.0: 7229 ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] 7230 ; GENERIC-NEXT: movb $1, %al # sched: [1:0.33] 7231 ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] 7232 ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] 7233 ; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] 7234 ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] 7235 ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] 7236 ; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] 7237 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] 7238 ; GENERIC-NEXT: retq # sched: [1:1.00] 7239 ; 7240 ; SKX-LABEL: vmov_test16: 7241 ; SKX: # %bb.0: 7242 ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] 7243 ; SKX-NEXT: movb $1, %al # sched: [1:0.25] 7244 ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] 7245 ; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] 7246 ; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] 7247 ; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] 7248 ; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] 7249 ; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] 7250 ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] 7251 ; SKX-NEXT: retq # sched: [7:1.00] 7252 %a = bitcast i64 %x to <64 x i1> 7253 %b = insertelement <64 x i1>%a, i1 true, i32 5 7254 %c = sext <64 x i1>%b to <64 x i8> 7255 ret <64 x i8>%c 7256 } 7257 7258 define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { 7259 ; 7260 ; GENERIC-LABEL: vmov_test17: 7261 ; GENERIC: # %bb.0: 7262 ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] 7263 ; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33] 7264 ; GENERIC-NEXT: setg %al # sched: [1:0.50] 7265 ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] 7266 ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] 7267 ; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] 7268 ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] 7269 ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] 7270 ; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] 7271 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] 7272 ; GENERIC-NEXT: retq # sched: [1:1.00] 7273 ; 7274 ; SKX-LABEL: vmov_test17: 7275 ; SKX: # %bb.0: 7276 ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] 7277 ; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25] 7278 ; SKX-NEXT: setg %al # sched: [1:0.50] 7279 ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] 7280 ; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] 7281 ; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] 7282 ; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] 7283 ; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] 7284 ; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] 7285 ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] 7286 ; SKX-NEXT: retq # sched: [7:1.00] 7287 %a = bitcast i64 %x to <64 x i1> 7288 %b = icmp sgt i32 %y, %z 7289 %c = insertelement <64 x i1>%a, i1 %b, i32 5 7290 %d = sext <64 x i1>%c to <64 x i8> 7291 ret <64 x i8>%d 7292 } 7293 7294 define <8 x i1> @vmov_test18(i8 %a, i16 %y) { 7295 ; GENERIC-LABEL: vmov_test18: 7296 ; GENERIC: # %bb.0: 7297 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 7298 ; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33] 7299 ; GENERIC-NEXT: kshiftrw $8, %k2, %k0 # sched: [1:1.00] 7300 ; GENERIC-NEXT: kshiftrw $9, %k2, %k2 # sched: [1:1.00] 7301 ; GENERIC-NEXT: kshiftrb $6, %k1, %k3 # sched: [1:1.00] 7302 ; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:0.33] 7303 ; GENERIC-NEXT: kshiftlb $7, %k2, %k2 # sched: [1:1.00] 7304 ; GENERIC-NEXT: kshiftrb $1, %k2, %k2 # sched: [1:1.00] 7305 ; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:0.33] 7306 ; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00] 7307 ; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00] 7308 ; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00] 7309 ; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:0.33] 7310 ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] 7311 ; GENERIC-NEXT: retq # sched: [1:1.00] 7312 ; 7313 ; SKX-LABEL: vmov_test18: 7314 ; SKX: # %bb.0: 7315 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 7316 ; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00] 7317 ; SKX-NEXT: kshiftrw $8, %k2, %k0 # sched: [3:1.00] 7318 ; SKX-NEXT: kshiftrw $9, %k2, %k2 # sched: [3:1.00] 7319 ; SKX-NEXT: kshiftrb $6, %k1, %k3 # sched: [3:1.00] 7320 ; SKX-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00] 7321 ; SKX-NEXT: kshiftlb $7, %k2, %k2 # sched: [3:1.00] 7322 ; SKX-NEXT: kshiftrb $1, %k2, %k2 # sched: [3:1.00] 7323 ; SKX-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00] 7324 ; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00] 7325 ; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00] 7326 ; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00] 7327 ; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00] 7328 ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] 7329 ; SKX-NEXT: retq # sched: [7:1.00] 7330 %b = bitcast i8 %a to <8 x i1> 7331 %b1 = bitcast i16 %y to <16 x i1> 7332 %el1 = extractelement <16 x i1>%b1, i32 8 7333 %el2 = extractelement <16 x i1>%b1, i32 9 7334 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 7335 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 7336 ret <8 x i1>%d 7337 } 7338 define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { 7339 ; GENERIC-LABEL: vmov_test21: 7340 ; GENERIC: # %bb.0: 7341 ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] 7342 ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] 7343 ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 7344 ; GENERIC-NEXT: retq # sched: [1:1.00] 7345 ; 7346 ; SKX-LABEL: vmov_test21: 7347 ; SKX: # %bb.0: 7348 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] 7349 ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] 7350 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 7351 ; SKX-NEXT: retq # sched: [7:1.00] 7352 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 7353 ret <32 x i16> %ret 7354 } 7355 7356 define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { 7357 ; GENERIC-LABEL: vmov_test22: 7358 ; GENERIC: # %bb.0: 7359 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 7360 ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] 7361 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7362 ; GENERIC-NEXT: retq # sched: [1:1.00] 7363 ; 7364 ; SKX-LABEL: vmov_test22: 7365 ; SKX: # %bb.0: 7366 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 7367 ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] 7368 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7369 ; SKX-NEXT: retq # sched: [7:1.00] 7370 store <4 x i1> %a, <4 x i1>* %addr 7371 ret void 7372 } 7373 7374 define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { 7375 ; GENERIC-LABEL: vmov_test23: 7376 ; GENERIC: # %bb.0: 7377 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 7378 ; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] 7379 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7380 ; GENERIC-NEXT: retq # sched: [1:1.00] 7381 ; 7382 ; SKX-LABEL: vmov_test23: 7383 ; SKX: # %bb.0: 7384 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 7385 ; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] 7386 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7387 ; SKX-NEXT: retq # sched: [7:1.00] 7388 store <2 x i1> %a, <2 x i1>* %addr 7389 ret void 7390 } 7391 7392 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { 7393 ; GENERIC-LABEL: store_v1i1: 7394 ; GENERIC: # %bb.0: 7395 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 7396 ; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:0.33] 7397 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 7398 ; GENERIC-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] 7399 ; GENERIC-NEXT: retq # sched: [1:1.00] 7400 ; 7401 ; SKX-LABEL: store_v1i1: 7402 ; SKX: # %bb.0: 7403 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 7404 ; SKX-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00] 7405 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 7406 ; SKX-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] 7407 ; SKX-NEXT: retq # sched: [7:1.00] 7408 %x = xor <1 x i1> %c, <i1 1> 7409 store <1 x i1> %x, <1 x i1>* %ptr, align 4 7410 ret void 7411 } 7412 7413 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { 7414 ; GENERIC-LABEL: store_v2i1: 7415 ; GENERIC: # %bb.0: 7416 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 7417 ; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] 7418 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 7419 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7420 ; GENERIC-NEXT: retq # sched: [1:1.00] 7421 ; 7422 ; SKX-LABEL: store_v2i1: 7423 ; SKX: # %bb.0: 7424 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 7425 ; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] 7426 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 7427 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7428 ; SKX-NEXT: retq # sched: [7:1.00] 7429 %x = xor <2 x i1> %c, <i1 1, i1 1> 7430 store <2 x i1> %x, <2 x i1>* %ptr, align 4 7431 ret void 7432 } 7433 7434 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { 7435 ; GENERIC-LABEL: store_v4i1: 7436 ; GENERIC: # %bb.0: 7437 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 7438 ; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] 7439 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 7440 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7441 ; GENERIC-NEXT: retq # sched: [1:1.00] 7442 ; 7443 ; SKX-LABEL: store_v4i1: 7444 ; SKX: # %bb.0: 7445 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 7446 ; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] 7447 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 7448 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7449 ; SKX-NEXT: retq # sched: [7:1.00] 7450 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> 7451 store <4 x i1> %x, <4 x i1>* %ptr, align 4 7452 ret void 7453 } 7454 7455 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { 7456 ; GENERIC-LABEL: store_v8i1: 7457 ; GENERIC: # %bb.0: 7458 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7459 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7460 ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] 7461 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7462 ; GENERIC-NEXT: retq # sched: [1:1.00] 7463 ; 7464 ; SKX-LABEL: store_v8i1: 7465 ; SKX: # %bb.0: 7466 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7467 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7468 ; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] 7469 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7470 ; SKX-NEXT: retq # sched: [7:1.00] 7471 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 7472 store <8 x i1> %x, <8 x i1>* %ptr, align 4 7473 ret void 7474 } 7475 7476 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { 7477 ; GENERIC-LABEL: store_v16i1: 7478 ; GENERIC: # %bb.0: 7479 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 7480 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 7481 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 7482 ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7483 ; GENERIC-NEXT: retq # sched: [1:1.00] 7484 ; 7485 ; SKX-LABEL: store_v16i1: 7486 ; SKX: # %bb.0: 7487 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 7488 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 7489 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 7490 ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7491 ; SKX-NEXT: retq # sched: [7:1.00] 7492 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 7493 store <16 x i1> %x, <16 x i1>* %ptr, align 4 7494 ret void 7495 } 7496 7497 ;void f2(int); 7498 ;void f1(int c) 7499 ;{ 7500 ; static int v = 0; 7501 ; if (v == 0) 7502 ; v = 1; 7503 ; else 7504 ; v = 0; 7505 ; f2(v); 7506 ;} 7507 7508 @f1.v = internal unnamed_addr global i1 false, align 4 7509 7510 define void @f1(i32 %c) { 7511 ; GENERIC-LABEL: f1: 7512 ; GENERIC: # %bb.0: # %entry 7513 ; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] 7514 ; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33] 7515 ; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] 7516 ; GENERIC-NEXT: jmp f2 # TAILCALL 7517 ; 7518 ; SKX-LABEL: f1: 7519 ; SKX: # %bb.0: # %entry 7520 ; SKX-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] 7521 ; SKX-NEXT: xorl $1, %edi # sched: [1:0.25] 7522 ; SKX-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] 7523 ; SKX-NEXT: jmp f2 # TAILCALL 7524 entry: 7525 %.b1 = load i1, i1* @f1.v, align 4 7526 %not..b1 = xor i1 %.b1, true 7527 store i1 %not..b1, i1* @f1.v, align 4 7528 %0 = zext i1 %not..b1 to i32 7529 tail call void @f2(i32 %0) #2 7530 ret void 7531 } 7532 7533 declare void @f2(i32) #1 7534 7535 define void @store_i16_i1(i16 %x, i1 *%y) { 7536 ; GENERIC-LABEL: store_i16_i1: 7537 ; GENERIC: # %bb.0: 7538 ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] 7539 ; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7540 ; GENERIC-NEXT: retq # sched: [1:1.00] 7541 ; 7542 ; SKX-LABEL: store_i16_i1: 7543 ; SKX: # %bb.0: 7544 ; SKX-NEXT: andl $1, %edi # sched: [1:0.25] 7545 ; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7546 ; SKX-NEXT: retq # sched: [7:1.00] 7547 %c = trunc i16 %x to i1 7548 store i1 %c, i1* %y 7549 ret void 7550 } 7551 7552 define void @store_i8_i1(i8 %x, i1 *%y) { 7553 ; GENERIC-LABEL: store_i8_i1: 7554 ; GENERIC: # %bb.0: 7555 ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] 7556 ; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7557 ; GENERIC-NEXT: retq # sched: [1:1.00] 7558 ; 7559 ; SKX-LABEL: store_i8_i1: 7560 ; SKX: # %bb.0: 7561 ; SKX-NEXT: andl $1, %edi # sched: [1:0.25] 7562 ; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7563 ; SKX-NEXT: retq # sched: [7:1.00] 7564 %c = trunc i8 %x to i1 7565 store i1 %c, i1* %y 7566 ret void 7567 } 7568 7569 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { 7570 ; GENERIC-LABEL: test_build_vec_v32i1: 7571 ; GENERIC: # %bb.0: 7572 ; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495 7573 ; GENERIC-NEXT: # sched: [1:0.33] 7574 ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] 7575 ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 7576 ; GENERIC-NEXT: retq # sched: [1:1.00] 7577 ; 7578 ; SKX-LABEL: test_build_vec_v32i1: 7579 ; SKX: # %bb.0: 7580 ; SKX-NEXT: movl $1497715861, %eax # imm = 0x59455495 7581 ; SKX-NEXT: # sched: [1:0.25] 7582 ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] 7583 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 7584 ; SKX-NEXT: retq # sched: [7:1.00] 7585 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer 7586 ret <32 x i16> %ret 7587 } 7588 7589 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { 7590 ; GENERIC-LABEL: test_build_vec_v64i1: 7591 ; GENERIC: # %bb.0: 7592 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50] 7593 ; GENERIC-NEXT: retq # sched: [1:1.00] 7594 ; 7595 ; SKX-LABEL: test_build_vec_v64i1: 7596 ; SKX: # %bb.0: 7597 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00] 7598 ; SKX-NEXT: retq # sched: [7:1.00] 7599 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer 7600 ret <64 x i8> %ret 7601 } 7602 7603 define void @ktest_1(<8 x double> %in, double * %base) { 7604 ; GENERIC-LABEL: ktest_1: 7605 ; GENERIC: # %bb.0: 7606 ; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [7:0.50] 7607 ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] 7608 ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50] 7609 ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] 7610 ; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33] 7611 ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] 7612 ; GENERIC-NEXT: # %bb.1: # %L1 7613 ; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] 7614 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7615 ; GENERIC-NEXT: retq # sched: [1:1.00] 7616 ; GENERIC-NEXT: .LBB410_2: # %L2 7617 ; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] 7618 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7619 ; GENERIC-NEXT: retq # sched: [1:1.00] 7620 ; 7621 ; SKX-LABEL: ktest_1: 7622 ; SKX: # %bb.0: 7623 ; SKX-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50] 7624 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] 7625 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50] 7626 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] 7627 ; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] 7628 ; SKX-NEXT: je .LBB410_2 # sched: [1:0.50] 7629 ; SKX-NEXT: # %bb.1: # %L1 7630 ; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] 7631 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7632 ; SKX-NEXT: retq # sched: [7:1.00] 7633 ; SKX-NEXT: .LBB410_2: # %L2 7634 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] 7635 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7636 ; SKX-NEXT: retq # sched: [7:1.00] 7637 %addr1 = getelementptr double, double * %base, i64 0 7638 %addr2 = getelementptr double, double * %base, i64 1 7639 7640 %vaddr1 = bitcast double* %addr1 to <8 x double>* 7641 %vaddr2 = bitcast double* %addr2 to <8 x double>* 7642 7643 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 7644 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 7645 7646 %sel1 = fcmp ogt <8 x double>%in, %val1 7647 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer 7648 %sel2 = fcmp olt <8 x double> %in, %val3 7649 %sel3 = and <8 x i1> %sel1, %sel2 7650 7651 %int_sel3 = bitcast <8 x i1> %sel3 to i8 7652 %res = icmp eq i8 %int_sel3, zeroinitializer 7653 br i1 %res, label %L2, label %L1 7654 L1: 7655 store <8 x double> %in, <8 x double>* %vaddr1 7656 br label %End 7657 L2: 7658 store <8 x double> %in, <8 x double>* %vaddr2 7659 br label %End 7660 End: 7661 ret void 7662 } 7663 7664 define void @ktest_2(<32 x float> %in, float * %base) { 7665 ; 7666 ; GENERIC-LABEL: ktest_2: 7667 ; GENERIC: # %bb.0: 7668 ; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [7:0.50] 7669 ; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [7:0.50] 7670 ; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] 7671 ; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] 7672 ; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00] 7673 ; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50] 7674 ; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50] 7675 ; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] 7676 ; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] 7677 ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] 7678 ; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33] 7679 ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] 7680 ; GENERIC-NEXT: # %bb.1: # %L1 7681 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 7682 ; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] 7683 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7684 ; GENERIC-NEXT: retq # sched: [1:1.00] 7685 ; GENERIC-NEXT: .LBB411_2: # %L2 7686 ; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] 7687 ; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] 7688 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7689 ; GENERIC-NEXT: retq # sched: [1:1.00] 7690 ; 7691 ; SKX-LABEL: ktest_2: 7692 ; SKX: # %bb.0: 7693 ; SKX-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50] 7694 ; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50] 7695 ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] 7696 ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] 7697 ; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] 7698 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50] 7699 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50] 7700 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] 7701 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] 7702 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] 7703 ; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00] 7704 ; SKX-NEXT: je .LBB411_2 # sched: [1:0.50] 7705 ; SKX-NEXT: # %bb.1: # %L1 7706 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 7707 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] 7708 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7709 ; SKX-NEXT: retq # sched: [7:1.00] 7710 ; SKX-NEXT: .LBB411_2: # %L2 7711 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] 7712 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] 7713 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7714 ; SKX-NEXT: retq # sched: [7:1.00] 7715 %addr1 = getelementptr float, float * %base, i64 0 7716 %addr2 = getelementptr float, float * %base, i64 1 7717 7718 %vaddr1 = bitcast float* %addr1 to <32 x float>* 7719 %vaddr2 = bitcast float* %addr2 to <32 x float>* 7720 7721 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 7722 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 7723 7724 %sel1 = fcmp ogt <32 x float>%in, %val1 7725 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer 7726 %sel2 = fcmp olt <32 x float> %in, %val3 7727 %sel3 = or <32 x i1> %sel1, %sel2 7728 7729 %int_sel3 = bitcast <32 x i1> %sel3 to i32 7730 %res = icmp eq i32 %int_sel3, zeroinitializer 7731 br i1 %res, label %L2, label %L1 7732 L1: 7733 store <32 x float> %in, <32 x float>* %vaddr1 7734 br label %End 7735 L2: 7736 store <32 x float> %in, <32 x float>* %vaddr2 7737 br label %End 7738 End: 7739 ret void 7740 } 7741 7742 define <8 x i64> @load_8i1(<8 x i1>* %a) { 7743 ; GENERIC-LABEL: load_8i1: 7744 ; GENERIC: # %bb.0: 7745 ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 7746 ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] 7747 ; GENERIC-NEXT: retq # sched: [1:1.00] 7748 ; 7749 ; SKX-LABEL: load_8i1: 7750 ; SKX: # %bb.0: 7751 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 7752 ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] 7753 ; SKX-NEXT: retq # sched: [7:1.00] 7754 %b = load <8 x i1>, <8 x i1>* %a 7755 %c = sext <8 x i1> %b to <8 x i64> 7756 ret <8 x i64> %c 7757 } 7758 7759 define <16 x i32> @load_16i1(<16 x i1>* %a) { 7760 ; GENERIC-LABEL: load_16i1: 7761 ; GENERIC: # %bb.0: 7762 ; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] 7763 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 7764 ; GENERIC-NEXT: retq # sched: [1:1.00] 7765 ; 7766 ; SKX-LABEL: load_16i1: 7767 ; SKX: # %bb.0: 7768 ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] 7769 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 7770 ; SKX-NEXT: retq # sched: [7:1.00] 7771 %b = load <16 x i1>, <16 x i1>* %a 7772 %c = sext <16 x i1> %b to <16 x i32> 7773 ret <16 x i32> %c 7774 } 7775 7776 define <2 x i16> @load_2i1(<2 x i1>* %a) { 7777 ; GENERIC-LABEL: load_2i1: 7778 ; GENERIC: # %bb.0: 7779 ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 7780 ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] 7781 ; GENERIC-NEXT: retq # sched: [1:1.00] 7782 ; 7783 ; SKX-LABEL: load_2i1: 7784 ; SKX: # %bb.0: 7785 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 7786 ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] 7787 ; SKX-NEXT: retq # sched: [7:1.00] 7788 %b = load <2 x i1>, <2 x i1>* %a 7789 %c = sext <2 x i1> %b to <2 x i16> 7790 ret <2 x i16> %c 7791 } 7792 7793 define <4 x i16> @load_4i1(<4 x i1>* %a) { 7794 ; GENERIC-LABEL: load_4i1: 7795 ; GENERIC: # %bb.0: 7796 ; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 7797 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 7798 ; GENERIC-NEXT: retq # sched: [1:1.00] 7799 ; 7800 ; SKX-LABEL: load_4i1: 7801 ; SKX: # %bb.0: 7802 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 7803 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 7804 ; SKX-NEXT: retq # sched: [7:1.00] 7805 %b = load <4 x i1>, <4 x i1>* %a 7806 %c = sext <4 x i1> %b to <4 x i16> 7807 ret <4 x i16> %c 7808 } 7809 7810 define <32 x i16> @load_32i1(<32 x i1>* %a) { 7811 ; GENERIC-LABEL: load_32i1: 7812 ; GENERIC: # %bb.0: 7813 ; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [5:0.50] 7814 ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] 7815 ; GENERIC-NEXT: retq # sched: [1:1.00] 7816 ; 7817 ; SKX-LABEL: load_32i1: 7818 ; SKX: # %bb.0: 7819 ; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00] 7820 ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] 7821 ; SKX-NEXT: retq # sched: [7:1.00] 7822 %b = load <32 x i1>, <32 x i1>* %a 7823 %c = sext <32 x i1> %b to <32 x i16> 7824 ret <32 x i16> %c 7825 } 7826 7827 define <64 x i8> @load_64i1(<64 x i1>* %a) { 7828 ; GENERIC-LABEL: load_64i1: 7829 ; GENERIC: # %bb.0: 7830 ; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [5:0.50] 7831 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] 7832 ; GENERIC-NEXT: retq # sched: [1:1.00] 7833 ; 7834 ; SKX-LABEL: load_64i1: 7835 ; SKX: # %bb.0: 7836 ; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00] 7837 ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] 7838 ; SKX-NEXT: retq # sched: [7:1.00] 7839 %b = load <64 x i1>, <64 x i1>* %a 7840 %c = sext <64 x i1> %b to <64 x i8> 7841 ret <64 x i8> %c 7842 } 7843 7844 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { 7845 ; GENERIC-LABEL: store_8i1: 7846 ; GENERIC: # %bb.0: 7847 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7848 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7849 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7850 ; GENERIC-NEXT: retq # sched: [1:1.00] 7851 ; 7852 ; SKX-LABEL: store_8i1: 7853 ; SKX: # %bb.0: 7854 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7855 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7856 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7857 ; SKX-NEXT: retq # sched: [7:1.00] 7858 store <8 x i1> %v, <8 x i1>* %a 7859 ret void 7860 } 7861 7862 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { 7863 ; GENERIC-LABEL: store_8i1_1: 7864 ; GENERIC: # %bb.0: 7865 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7866 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7867 ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7868 ; GENERIC-NEXT: retq # sched: [1:1.00] 7869 ; 7870 ; SKX-LABEL: store_8i1_1: 7871 ; SKX: # %bb.0: 7872 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7873 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7874 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7875 ; SKX-NEXT: retq # sched: [7:1.00] 7876 %v1 = trunc <8 x i16> %v to <8 x i1> 7877 store <8 x i1> %v1, <8 x i1>* %a 7878 ret void 7879 } 7880 7881 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { 7882 ; GENERIC-LABEL: store_16i1: 7883 ; GENERIC: # %bb.0: 7884 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 7885 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 7886 ; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7887 ; GENERIC-NEXT: retq # sched: [1:1.00] 7888 ; 7889 ; SKX-LABEL: store_16i1: 7890 ; SKX: # %bb.0: 7891 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 7892 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 7893 ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7894 ; SKX-NEXT: retq # sched: [7:1.00] 7895 store <16 x i1> %v, <16 x i1>* %a 7896 ret void 7897 } 7898 7899 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { 7900 ; GENERIC-LABEL: store_32i1: 7901 ; GENERIC: # %bb.0: 7902 ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] 7903 ; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33] 7904 ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7905 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7906 ; GENERIC-NEXT: retq # sched: [1:1.00] 7907 ; 7908 ; SKX-LABEL: store_32i1: 7909 ; SKX: # %bb.0: 7910 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] 7911 ; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00] 7912 ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7913 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7914 ; SKX-NEXT: retq # sched: [7:1.00] 7915 store <32 x i1> %v, <32 x i1>* %a 7916 ret void 7917 } 7918 7919 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { 7920 ; GENERIC-LABEL: store_32i1_1: 7921 ; GENERIC: # %bb.0: 7922 ; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] 7923 ; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33] 7924 ; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7925 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7926 ; GENERIC-NEXT: retq # sched: [1:1.00] 7927 ; 7928 ; SKX-LABEL: store_32i1_1: 7929 ; SKX: # %bb.0: 7930 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] 7931 ; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] 7932 ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7933 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7934 ; SKX-NEXT: retq # sched: [7:1.00] 7935 %v1 = trunc <32 x i16> %v to <32 x i1> 7936 store <32 x i1> %v1, <32 x i1>* %a 7937 ret void 7938 } 7939 7940 7941 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { 7942 ; 7943 ; GENERIC-LABEL: store_64i1: 7944 ; GENERIC: # %bb.0: 7945 ; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] 7946 ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] 7947 ; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] 7948 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7949 ; GENERIC-NEXT: retq # sched: [1:1.00] 7950 ; 7951 ; SKX-LABEL: store_64i1: 7952 ; SKX: # %bb.0: 7953 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] 7954 ; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] 7955 ; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] 7956 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7957 ; SKX-NEXT: retq # sched: [7:1.00] 7958 store <64 x i1> %v, <64 x i1>* %a 7959 ret void 7960 } 7961 7962 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { 7963 ; GENERIC-LABEL: test_bitcast_v8i1_zext: 7964 ; GENERIC: # %bb.0: 7965 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] 7966 ; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] 7967 ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] 7968 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7969 ; GENERIC-NEXT: retq # sched: [1:1.00] 7970 ; 7971 ; SKX-LABEL: test_bitcast_v8i1_zext: 7972 ; SKX: # %bb.0: 7973 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] 7974 ; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00] 7975 ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] 7976 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 7977 ; SKX-NEXT: retq # sched: [7:1.00] 7978 %v1 = icmp eq <16 x i32> %a, zeroinitializer 7979 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7980 %mask1 = bitcast <8 x i1> %mask to i8 7981 %val = zext i8 %mask1 to i32 7982 %val1 = add i32 %val, %val 7983 ret i32 %val1 7984 } 7985 7986 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { 7987 ; GENERIC-LABEL: test_bitcast_v16i1_zext: 7988 ; GENERIC: # %bb.0: 7989 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] 7990 ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] 7991 ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] 7992 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7993 ; GENERIC-NEXT: retq # sched: [1:1.00] 7994 ; 7995 ; SKX-LABEL: test_bitcast_v16i1_zext: 7996 ; SKX: # %bb.0: 7997 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] 7998 ; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] 7999 ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] 8000 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 8001 ; SKX-NEXT: retq # sched: [7:1.00] 8002 %v1 = icmp eq <16 x i32> %a, zeroinitializer 8003 %mask1 = bitcast <16 x i1> %v1 to i16 8004 %val = zext i16 %mask1 to i32 8005 %val1 = add i32 %val, %val 8006 ret i32 %val1 8007 } 8008 8009 define i16 @test_v16i1_add(i16 %x, i16 %y) { 8010 ; GENERIC-LABEL: test_v16i1_add: 8011 ; GENERIC: # %bb.0: 8012 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8013 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8014 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 8015 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8016 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8017 ; GENERIC-NEXT: retq # sched: [1:1.00] 8018 ; 8019 ; SKX-LABEL: test_v16i1_add: 8020 ; SKX: # %bb.0: 8021 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8022 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8023 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 8024 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8025 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8026 ; SKX-NEXT: retq # sched: [7:1.00] 8027 %m0 = bitcast i16 %x to <16 x i1> 8028 %m1 = bitcast i16 %y to <16 x i1> 8029 %m2 = add <16 x i1> %m0, %m1 8030 %ret = bitcast <16 x i1> %m2 to i16 8031 ret i16 %ret 8032 } 8033 8034 define i16 @test_v16i1_sub(i16 %x, i16 %y) { 8035 ; GENERIC-LABEL: test_v16i1_sub: 8036 ; GENERIC: # %bb.0: 8037 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8038 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8039 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 8040 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8041 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8042 ; GENERIC-NEXT: retq # sched: [1:1.00] 8043 ; 8044 ; SKX-LABEL: test_v16i1_sub: 8045 ; SKX: # %bb.0: 8046 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8047 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8048 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 8049 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8050 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8051 ; SKX-NEXT: retq # sched: [7:1.00] 8052 %m0 = bitcast i16 %x to <16 x i1> 8053 %m1 = bitcast i16 %y to <16 x i1> 8054 %m2 = sub <16 x i1> %m0, %m1 8055 %ret = bitcast <16 x i1> %m2 to i16 8056 ret i16 %ret 8057 } 8058 8059 define i16 @test_v16i1_mul(i16 %x, i16 %y) { 8060 ; GENERIC-LABEL: test_v16i1_mul: 8061 ; GENERIC: # %bb.0: 8062 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8063 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8064 ; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:0.33] 8065 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8066 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8067 ; GENERIC-NEXT: retq # sched: [1:1.00] 8068 ; 8069 ; SKX-LABEL: test_v16i1_mul: 8070 ; SKX: # %bb.0: 8071 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8072 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8073 ; SKX-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00] 8074 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8075 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8076 ; SKX-NEXT: retq # sched: [7:1.00] 8077 %m0 = bitcast i16 %x to <16 x i1> 8078 %m1 = bitcast i16 %y to <16 x i1> 8079 %m2 = mul <16 x i1> %m0, %m1 8080 %ret = bitcast <16 x i1> %m2 to i16 8081 ret i16 %ret 8082 } 8083 8084 define i8 @test_v8i1_add(i8 %x, i8 %y) { 8085 ; GENERIC-LABEL: test_v8i1_add: 8086 ; GENERIC: # %bb.0: 8087 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8088 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8089 ; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] 8090 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8091 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax 8092 ; GENERIC-NEXT: retq # sched: [1:1.00] 8093 ; 8094 ; SKX-LABEL: test_v8i1_add: 8095 ; SKX: # %bb.0: 8096 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8097 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8098 ; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] 8099 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8100 ; SKX-NEXT: # kill: def $al killed $al killed $eax 8101 ; SKX-NEXT: retq # sched: [7:1.00] 8102 %m0 = bitcast i8 %x to <8 x i1> 8103 %m1 = bitcast i8 %y to <8 x i1> 8104 %m2 = add <8 x i1> %m0, %m1 8105 %ret = bitcast <8 x i1> %m2 to i8 8106 ret i8 %ret 8107 } 8108 8109 define i8 @test_v8i1_sub(i8 %x, i8 %y) { 8110 ; GENERIC-LABEL: test_v8i1_sub: 8111 ; GENERIC: # %bb.0: 8112 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8113 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8114 ; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] 8115 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8116 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax 8117 ; GENERIC-NEXT: retq # sched: [1:1.00] 8118 ; 8119 ; SKX-LABEL: test_v8i1_sub: 8120 ; SKX: # %bb.0: 8121 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8122 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8123 ; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] 8124 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8125 ; SKX-NEXT: # kill: def $al killed $al killed $eax 8126 ; SKX-NEXT: retq # sched: [7:1.00] 8127 %m0 = bitcast i8 %x to <8 x i1> 8128 %m1 = bitcast i8 %y to <8 x i1> 8129 %m2 = sub <8 x i1> %m0, %m1 8130 %ret = bitcast <8 x i1> %m2 to i8 8131 ret i8 %ret 8132 } 8133 8134 define i8 @test_v8i1_mul(i8 %x, i8 %y) { 8135 ; GENERIC-LABEL: test_v8i1_mul: 8136 ; GENERIC: # %bb.0: 8137 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8138 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8139 ; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:0.33] 8140 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8141 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax 8142 ; GENERIC-NEXT: retq # sched: [1:1.00] 8143 ; 8144 ; SKX-LABEL: test_v8i1_mul: 8145 ; SKX: # %bb.0: 8146 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8147 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8148 ; SKX-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00] 8149 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8150 ; SKX-NEXT: # kill: def $al killed $al killed $eax 8151 ; SKX-NEXT: retq # sched: [7:1.00] 8152 %m0 = bitcast i8 %x to <8 x i1> 8153 %m1 = bitcast i8 %y to <8 x i1> 8154 %m2 = mul <8 x i1> %m0, %m1 8155 %ret = bitcast <8 x i1> %m2 to i8 8156 ret i8 %ret 8157 } 8158 8159 define <16 x i32> @_inreg16xi32(i32 %a) { 8160 ; GENERIC-LABEL: _inreg16xi32: 8161 ; GENERIC: # %bb.0: 8162 ; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] 8163 ; GENERIC-NEXT: retq # sched: [1:1.00] 8164 ; 8165 ; SKX-LABEL: _inreg16xi32: 8166 ; SKX: # %bb.0: 8167 ; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] 8168 ; SKX-NEXT: retq # sched: [7:1.00] 8169 %b = insertelement <16 x i32> undef, i32 %a, i32 0 8170 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 8171 ret <16 x i32> %c 8172 } 8173 8174 define <8 x i64> @_inreg8xi64(i64 %a) { 8175 ; GENERIC-LABEL: _inreg8xi64: 8176 ; GENERIC: # %bb.0: 8177 ; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] 8178 ; GENERIC-NEXT: retq # sched: [1:1.00] 8179 ; 8180 ; SKX-LABEL: _inreg8xi64: 8181 ; SKX: # %bb.0: 8182 ; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] 8183 ; SKX-NEXT: retq # sched: [7:1.00] 8184 %b = insertelement <8 x i64> undef, i64 %a, i32 0 8185 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 8186 ret <8 x i64> %c 8187 } 8188 8189 define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { 8190 ; GENERIC-LABEL: _ss16xfloat_v4: 8191 ; GENERIC: # %bb.0: 8192 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8193 ; GENERIC-NEXT: retq # sched: [1:1.00] 8194 ; 8195 ; SKX-LABEL: _ss16xfloat_v4: 8196 ; SKX: # %bb.0: 8197 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8198 ; SKX-NEXT: retq # sched: [7:1.00] 8199 %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer 8200 ret <16 x float> %b 8201 } 8202 8203 define <16 x float> @_inreg16xfloat(float %a) { 8204 ; GENERIC-LABEL: _inreg16xfloat: 8205 ; GENERIC: # %bb.0: 8206 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8207 ; GENERIC-NEXT: retq # sched: [1:1.00] 8208 ; 8209 ; SKX-LABEL: _inreg16xfloat: 8210 ; SKX: # %bb.0: 8211 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8212 ; SKX-NEXT: retq # sched: [7:1.00] 8213 %b = insertelement <16 x float> undef, float %a, i32 0 8214 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8215 ret <16 x float> %c 8216 } 8217 8218 define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { 8219 ; GENERIC-LABEL: _ss16xfloat_mask: 8220 ; GENERIC: # %bb.0: 8221 ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8222 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00] 8223 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 8224 ; GENERIC-NEXT: retq # sched: [1:1.00] 8225 ; 8226 ; SKX-LABEL: _ss16xfloat_mask: 8227 ; SKX: # %bb.0: 8228 ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8229 ; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00] 8230 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 8231 ; SKX-NEXT: retq # sched: [7:1.00] 8232 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8233 %b = insertelement <16 x float> undef, float %a, i32 0 8234 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8235 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 8236 ret <16 x float> %r 8237 } 8238 8239 define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { 8240 ; GENERIC-LABEL: _ss16xfloat_maskz: 8241 ; GENERIC: # %bb.0: 8242 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8243 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 8244 ; GENERIC-NEXT: retq # sched: [1:1.00] 8245 ; 8246 ; SKX-LABEL: _ss16xfloat_maskz: 8247 ; SKX: # %bb.0: 8248 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8249 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 8250 ; SKX-NEXT: retq # sched: [7:1.00] 8251 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8252 %b = insertelement <16 x float> undef, float %a, i32 0 8253 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8254 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 8255 ret <16 x float> %r 8256 } 8257 8258 define <16 x float> @_ss16xfloat_load(float* %a.ptr) { 8259 ; GENERIC-LABEL: _ss16xfloat_load: 8260 ; GENERIC: # %bb.0: 8261 ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00] 8262 ; GENERIC-NEXT: retq # sched: [1:1.00] 8263 ; 8264 ; SKX-LABEL: _ss16xfloat_load: 8265 ; SKX: # %bb.0: 8266 ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50] 8267 ; SKX-NEXT: retq # sched: [7:1.00] 8268 %a = load float, float* %a.ptr 8269 %b = insertelement <16 x float> undef, float %a, i32 0 8270 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8271 ret <16 x float> %c 8272 } 8273 8274 define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { 8275 ; GENERIC-LABEL: _ss16xfloat_mask_load: 8276 ; GENERIC: # %bb.0: 8277 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8278 ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00] 8279 ; GENERIC-NEXT: retq # sched: [1:1.00] 8280 ; 8281 ; SKX-LABEL: _ss16xfloat_mask_load: 8282 ; SKX: # %bb.0: 8283 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8284 ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50] 8285 ; SKX-NEXT: retq # sched: [7:1.00] 8286 %a = load float, float* %a.ptr 8287 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8288 %b = insertelement <16 x float> undef, float %a, i32 0 8289 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8290 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 8291 ret <16 x float> %r 8292 } 8293 8294 define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { 8295 ; GENERIC-LABEL: _ss16xfloat_maskz_load: 8296 ; GENERIC: # %bb.0: 8297 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8298 ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 8299 ; GENERIC-NEXT: retq # sched: [1:1.00] 8300 ; 8301 ; SKX-LABEL: _ss16xfloat_maskz_load: 8302 ; SKX: # %bb.0: 8303 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8304 ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 8305 ; SKX-NEXT: retq # sched: [7:1.00] 8306 %a = load float, float* %a.ptr 8307 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8308 %b = insertelement <16 x float> undef, float %a, i32 0 8309 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8310 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 8311 ret <16 x float> %r 8312 } 8313 8314 define <8 x double> @_inreg8xdouble(double %a) { 8315 ; GENERIC-LABEL: _inreg8xdouble: 8316 ; GENERIC: # %bb.0: 8317 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8318 ; GENERIC-NEXT: retq # sched: [1:1.00] 8319 ; 8320 ; SKX-LABEL: _inreg8xdouble: 8321 ; SKX: # %bb.0: 8322 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8323 ; SKX-NEXT: retq # sched: [7:1.00] 8324 %b = insertelement <8 x double> undef, double %a, i32 0 8325 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8326 ret <8 x double> %c 8327 } 8328 8329 define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { 8330 ; GENERIC-LABEL: _sd8xdouble_mask: 8331 ; GENERIC: # %bb.0: 8332 ; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8333 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00] 8334 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 8335 ; GENERIC-NEXT: retq # sched: [1:1.00] 8336 ; 8337 ; SKX-LABEL: _sd8xdouble_mask: 8338 ; SKX: # %bb.0: 8339 ; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8340 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00] 8341 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 8342 ; SKX-NEXT: retq # sched: [7:1.00] 8343 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8344 %b = insertelement <8 x double> undef, double %a, i32 0 8345 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8346 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 8347 ret <8 x double> %r 8348 } 8349 8350 define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { 8351 ; GENERIC-LABEL: _sd8xdouble_maskz: 8352 ; GENERIC: # %bb.0: 8353 ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8354 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 8355 ; GENERIC-NEXT: retq # sched: [1:1.00] 8356 ; 8357 ; SKX-LABEL: _sd8xdouble_maskz: 8358 ; SKX: # %bb.0: 8359 ; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8360 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 8361 ; SKX-NEXT: retq # sched: [7:1.00] 8362 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8363 %b = insertelement <8 x double> undef, double %a, i32 0 8364 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8365 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 8366 ret <8 x double> %r 8367 } 8368 8369 define <8 x double> @_sd8xdouble_load(double* %a.ptr) { 8370 ; GENERIC-LABEL: _sd8xdouble_load: 8371 ; GENERIC: # %bb.0: 8372 ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00] 8373 ; GENERIC-NEXT: retq # sched: [1:1.00] 8374 ; 8375 ; SKX-LABEL: _sd8xdouble_load: 8376 ; SKX: # %bb.0: 8377 ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50] 8378 ; SKX-NEXT: retq # sched: [7:1.00] 8379 %a = load double, double* %a.ptr 8380 %b = insertelement <8 x double> undef, double %a, i32 0 8381 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8382 ret <8 x double> %c 8383 } 8384 8385 define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { 8386 ; GENERIC-LABEL: _sd8xdouble_mask_load: 8387 ; GENERIC: # %bb.0: 8388 ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8389 ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00] 8390 ; GENERIC-NEXT: retq # sched: [1:1.00] 8391 ; 8392 ; SKX-LABEL: _sd8xdouble_mask_load: 8393 ; SKX: # %bb.0: 8394 ; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8395 ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50] 8396 ; SKX-NEXT: retq # sched: [7:1.00] 8397 %a = load double, double* %a.ptr 8398 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8399 %b = insertelement <8 x double> undef, double %a, i32 0 8400 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8401 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 8402 ret <8 x double> %r 8403 } 8404 8405 define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { 8406 ; GENERIC-LABEL: _sd8xdouble_maskz_load: 8407 ; GENERIC: # %bb.0: 8408 ; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8409 ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 8410 ; GENERIC-NEXT: retq # sched: [1:1.00] 8411 ; 8412 ; SKX-LABEL: _sd8xdouble_maskz_load: 8413 ; SKX: # %bb.0: 8414 ; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8415 ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 8416 ; SKX-NEXT: retq # sched: [7:1.00] 8417 %a = load double, double* %a.ptr 8418 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8419 %b = insertelement <8 x double> undef, double %a, i32 0 8420 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8421 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 8422 ret <8 x double> %r 8423 } 8424 8425 define <16 x i32> @_xmm16xi32(<16 x i32> %a) { 8426 ; GENERIC-LABEL: _xmm16xi32: 8427 ; GENERIC: # %bb.0: 8428 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8429 ; GENERIC-NEXT: retq # sched: [1:1.00] 8430 ; 8431 ; SKX-LABEL: _xmm16xi32: 8432 ; SKX: # %bb.0: 8433 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8434 ; SKX-NEXT: retq # sched: [7:1.00] 8435 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer 8436 ret <16 x i32> %b 8437 } 8438 8439 define <16 x float> @_xmm16xfloat(<16 x float> %a) { 8440 ; GENERIC-LABEL: _xmm16xfloat: 8441 ; GENERIC: # %bb.0: 8442 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8443 ; GENERIC-NEXT: retq # sched: [1:1.00] 8444 ; 8445 ; SKX-LABEL: _xmm16xfloat: 8446 ; SKX: # %bb.0: 8447 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8448 ; SKX-NEXT: retq # sched: [7:1.00] 8449 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer 8450 ret <16 x float> %b 8451 } 8452 8453 define <16 x i32> @test_vbroadcast() { 8454 ; GENERIC-LABEL: test_vbroadcast: 8455 ; GENERIC: # %bb.0: # %entry 8456 ; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 8457 ; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] 8458 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 8459 ; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33] 8460 ; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 8461 ; GENERIC-NEXT: retq # sched: [1:1.00] 8462 ; 8463 ; SKX-LABEL: test_vbroadcast: 8464 ; SKX: # %bb.0: # %entry 8465 ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 8466 ; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] 8467 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 8468 ; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00] 8469 ; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 8470 ; SKX-NEXT: retq # sched: [7:1.00] 8471 entry: 8472 %0 = sext <16 x i1> zeroinitializer to <16 x i32> 8473 %1 = fcmp uno <16 x float> undef, zeroinitializer 8474 %2 = sext <16 x i1> %1 to <16 x i32> 8475 %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 8476 ret <16 x i32> %3 8477 } 8478 8479 ; We implement the set1 intrinsics with vector initializers. Verify that the 8480 ; IR generated will produce broadcasts at the end. 8481 define <8 x double> @test_set1_pd(double %d) #2 { 8482 ; GENERIC-LABEL: test_set1_pd: 8483 ; GENERIC: # %bb.0: # %entry 8484 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8485 ; GENERIC-NEXT: retq # sched: [1:1.00] 8486 ; 8487 ; SKX-LABEL: test_set1_pd: 8488 ; SKX: # %bb.0: # %entry 8489 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8490 ; SKX-NEXT: retq # sched: [7:1.00] 8491 entry: 8492 %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 8493 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 8494 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2 8495 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3 8496 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4 8497 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5 8498 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6 8499 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7 8500 ret <8 x double> %vecinit7.i 8501 } 8502 8503 define <8 x i64> @test_set1_epi64(i64 %d) #2 { 8504 ; GENERIC-LABEL: test_set1_epi64: 8505 ; GENERIC: # %bb.0: # %entry 8506 ; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] 8507 ; GENERIC-NEXT: retq # sched: [1:1.00] 8508 ; 8509 ; SKX-LABEL: test_set1_epi64: 8510 ; SKX: # %bb.0: # %entry 8511 ; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] 8512 ; SKX-NEXT: retq # sched: [7:1.00] 8513 entry: 8514 %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 8515 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 8516 %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2 8517 %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3 8518 %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4 8519 %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5 8520 %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6 8521 %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7 8522 ret <8 x i64> %vecinit7.i 8523 } 8524 8525 define <16 x float> @test_set1_ps(float %f) #2 { 8526 ; GENERIC-LABEL: test_set1_ps: 8527 ; GENERIC: # %bb.0: # %entry 8528 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8529 ; GENERIC-NEXT: retq # sched: [1:1.00] 8530 ; 8531 ; SKX-LABEL: test_set1_ps: 8532 ; SKX: # %bb.0: # %entry 8533 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8534 ; SKX-NEXT: retq # sched: [7:1.00] 8535 entry: 8536 %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 8537 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 8538 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2 8539 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3 8540 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4 8541 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5 8542 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6 8543 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7 8544 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8 8545 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9 8546 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10 8547 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11 8548 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12 8549 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13 8550 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14 8551 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15 8552 ret <16 x float> %vecinit15.i 8553 } 8554 8555 define <16 x i32> @test_set1_epi32(i32 %f) #2 { 8556 ; GENERIC-LABEL: test_set1_epi32: 8557 ; GENERIC: # %bb.0: # %entry 8558 ; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] 8559 ; GENERIC-NEXT: retq # sched: [1:1.00] 8560 ; 8561 ; SKX-LABEL: test_set1_epi32: 8562 ; SKX: # %bb.0: # %entry 8563 ; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] 8564 ; SKX-NEXT: retq # sched: [7:1.00] 8565 entry: 8566 %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 8567 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 8568 %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2 8569 %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3 8570 %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4 8571 %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5 8572 %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6 8573 %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7 8574 %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8 8575 %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9 8576 %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10 8577 %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11 8578 %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12 8579 %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13 8580 %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14 8581 %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15 8582 ret <16 x i32> %vecinit15.i 8583 } 8584 8585 ; We implement the scalar broadcast intrinsics with vector initializers. 8586 ; Verify that the IR generated will produce the broadcast at the end. 8587 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { 8588 ; GENERIC-LABEL: test_mm512_broadcastsd_pd: 8589 ; GENERIC: # %bb.0: # %entry 8590 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8591 ; GENERIC-NEXT: retq # sched: [1:1.00] 8592 ; 8593 ; SKX-LABEL: test_mm512_broadcastsd_pd: 8594 ; SKX: # %bb.0: # %entry 8595 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8596 ; SKX-NEXT: retq # sched: [7:1.00] 8597 entry: 8598 %0 = extractelement <2 x double> %a, i32 0 8599 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 8600 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1 8601 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2 8602 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3 8603 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4 8604 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5 8605 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6 8606 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7 8607 ret <8 x double> %vecinit7.i 8608 } 8609 8610 define <16 x float> @suff_test1(<8 x float>%a) { 8611 ; GENERIC-LABEL: suff_test1: 8612 ; GENERIC: # %bb.0: 8613 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8614 ; GENERIC-NEXT: retq # sched: [1:1.00] 8615 ; 8616 ; SKX-LABEL: suff_test1: 8617 ; SKX: # %bb.0: 8618 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8619 ; SKX-NEXT: retq # sched: [7:1.00] 8620 %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer 8621 ret <16 x float>%res 8622 } 8623 8624 define <8 x double> @suff_test2(<4 x double>%a) { 8625 ; GENERIC-LABEL: suff_test2: 8626 ; GENERIC: # %bb.0: 8627 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8628 ; GENERIC-NEXT: retq # sched: [1:1.00] 8629 ; 8630 ; SKX-LABEL: suff_test2: 8631 ; SKX: # %bb.0: 8632 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8633 ; SKX-NEXT: retq # sched: [7:1.00] 8634 %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer 8635 ret <8 x double>%res 8636 } 8637 8638 define <64 x i8> @_invec32xi8(<32 x i8>%a) { 8639 ; GENERIC-LABEL: _invec32xi8: 8640 ; GENERIC: # %bb.0: 8641 ; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00] 8642 ; GENERIC-NEXT: retq # sched: [1:1.00] 8643 ; 8644 ; SKX-LABEL: _invec32xi8: 8645 ; SKX: # %bb.0: 8646 ; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00] 8647 ; SKX-NEXT: retq # sched: [7:1.00] 8648 %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer 8649 ret <64 x i8>%res 8650 } 8651 8652 define <32 x i16> @_invec16xi16(<16 x i16>%a) { 8653 ; GENERIC-LABEL: _invec16xi16: 8654 ; GENERIC: # %bb.0: 8655 ; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00] 8656 ; GENERIC-NEXT: retq # sched: [1:1.00] 8657 ; 8658 ; SKX-LABEL: _invec16xi16: 8659 ; SKX: # %bb.0: 8660 ; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00] 8661 ; SKX-NEXT: retq # sched: [7:1.00] 8662 %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer 8663 ret <32 x i16>%res 8664 } 8665 8666 define <16 x i32> @_invec8xi32(<8 x i32>%a) { 8667 ; GENERIC-LABEL: _invec8xi32: 8668 ; GENERIC: # %bb.0: 8669 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8670 ; GENERIC-NEXT: retq # sched: [1:1.00] 8671 ; 8672 ; SKX-LABEL: _invec8xi32: 8673 ; SKX: # %bb.0: 8674 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8675 ; SKX-NEXT: retq # sched: [7:1.00] 8676 %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer 8677 ret <16 x i32>%res 8678 } 8679 8680 define <8 x i64> @_invec4xi64(<4 x i64>%a) { 8681 ; GENERIC-LABEL: _invec4xi64: 8682 ; GENERIC: # %bb.0: 8683 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8684 ; GENERIC-NEXT: retq # sched: [1:1.00] 8685 ; 8686 ; SKX-LABEL: _invec4xi64: 8687 ; SKX: # %bb.0: 8688 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8689 ; SKX-NEXT: retq # sched: [7:1.00] 8690 %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer 8691 ret <8 x i64>%res 8692 } 8693 8694 declare void @func_f32(float) 8695 define <16 x float> @broadcast_ss_spill(float %x) { 8696 ; GENERIC-LABEL: broadcast_ss_spill: 8697 ; GENERIC: # %bb.0: 8698 ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] 8699 ; GENERIC-NEXT: .cfi_def_cfa_offset 32 8700 ; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 8701 ; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8702 ; GENERIC-NEXT: callq func_f32 8703 ; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] 8704 ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] 8705 ; GENERIC-NEXT: .cfi_def_cfa_offset 8 8706 ; GENERIC-NEXT: retq # sched: [1:1.00] 8707 ; 8708 ; SKX-LABEL: broadcast_ss_spill: 8709 ; SKX: # %bb.0: 8710 ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] 8711 ; SKX-NEXT: .cfi_def_cfa_offset 32 8712 ; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 8713 ; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8714 ; SKX-NEXT: callq func_f32 8715 ; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] 8716 ; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] 8717 ; SKX-NEXT: .cfi_def_cfa_offset 8 8718 ; SKX-NEXT: retq # sched: [7:1.00] 8719 %a = fadd float %x, %x 8720 call void @func_f32(float %a) 8721 %b = insertelement <16 x float> undef, float %a, i32 0 8722 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8723 ret <16 x float> %c 8724 } 8725 8726 declare void @func_f64(double) 8727 define <8 x double> @broadcast_sd_spill(double %x) { 8728 ; GENERIC-LABEL: broadcast_sd_spill: 8729 ; GENERIC: # %bb.0: 8730 ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] 8731 ; GENERIC-NEXT: .cfi_def_cfa_offset 32 8732 ; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 8733 ; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8734 ; GENERIC-NEXT: callq func_f64 8735 ; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] 8736 ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] 8737 ; GENERIC-NEXT: .cfi_def_cfa_offset 8 8738 ; GENERIC-NEXT: retq # sched: [1:1.00] 8739 ; 8740 ; SKX-LABEL: broadcast_sd_spill: 8741 ; SKX: # %bb.0: 8742 ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] 8743 ; SKX-NEXT: .cfi_def_cfa_offset 32 8744 ; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 8745 ; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8746 ; SKX-NEXT: callq func_f64 8747 ; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] 8748 ; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] 8749 ; SKX-NEXT: .cfi_def_cfa_offset 8 8750 ; SKX-NEXT: retq # sched: [7:1.00] 8751 %a = fadd double %x, %x 8752 call void @func_f64(double %a) 8753 %b = insertelement <8 x double> undef, double %a, i32 0 8754 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8755 ret <8 x double> %c 8756 } 8757