1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s 3 4 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) 5 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) 6 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) 7 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) 8 declare <8 x double> @llvm.floor.v8f64(<8 x double> %p) 9 declare <16 x float> @llvm.floor.v16f32(<16 x float> %p) 10 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 11 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 12 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 13 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 14 declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 15 declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 16 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 17 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 18 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 19 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 20 declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 21 declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 22 declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) 23 declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) 24 declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) 25 declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) 26 declare <8 x double> @llvm.rint.v8f64(<8 x double> %p) 27 declare <16 x float> @llvm.rint.v16f32(<16 x float> %p) 28 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 29 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 30 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 31 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 32 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 33 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 34 35 define <2 x double> @floor_v2f64(<2 x double> %p) { 36 ; CHECK-LABEL: floor_v2f64: 37 ; CHECK: ## %bb.0: 38 ; CHECK-NEXT: vroundpd $9, %xmm0, %xmm0 39 ; CHECK-NEXT: retq 40 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 41 ret <2 x double> %t 42 } 43 44 define <4 x float> @floor_v4f32(<4 x float> %p) { 45 ; CHECK-LABEL: floor_v4f32: 46 ; CHECK: ## %bb.0: 47 ; CHECK-NEXT: vroundps $9, %xmm0, %xmm0 48 ; CHECK-NEXT: retq 49 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 50 ret <4 x float> %t 51 } 52 53 define <4 x double> @floor_v4f64(<4 x double> %p){ 54 ; CHECK-LABEL: floor_v4f64: 55 ; CHECK: ## %bb.0: 56 ; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0 57 ; CHECK-NEXT: retq 58 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 59 ret <4 x double> %t 60 } 61 62 define <8 x float> @floor_v8f32(<8 x float> %p) { 63 ; CHECK-LABEL: floor_v8f32: 64 ; CHECK: ## %bb.0: 65 ; CHECK-NEXT: vroundps $9, %ymm0, %ymm0 66 ; CHECK-NEXT: retq 67 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 68 ret <8 x float> %t 69 } 70 71 define <8 x double> @floor_v8f64(<8 x double> %p){ 72 ; CHECK-LABEL: floor_v8f64: 73 ; CHECK: ## %bb.0: 74 ; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0 75 ; CHECK-NEXT: retq 76 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 77 ret <8 x double> %t 78 } 79 80 define <16 x float> @floor_v16f32(<16 x float> %p) { 81 ; CHECK-LABEL: floor_v16f32: 82 ; CHECK: ## %bb.0: 83 ; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0 84 ; CHECK-NEXT: retq 85 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 86 ret <16 x float> %t 87 } 88 89 define <2 x double> @floor_v2f64_load(<2 x double>* %ptr) { 90 ; CHECK-LABEL: floor_v2f64_load: 91 ; CHECK: ## %bb.0: 92 ; CHECK-NEXT: vroundpd $9, (%rdi), %xmm0 93 ; CHECK-NEXT: retq 94 %p = load <2 x double>, <2 x double>* %ptr 95 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 96 ret <2 x double> %t 97 } 98 99 define <4 x float> @floor_v4f32_load(<4 x float>* %ptr) { 100 ; CHECK-LABEL: floor_v4f32_load: 101 ; CHECK: ## %bb.0: 102 ; CHECK-NEXT: vroundps $9, (%rdi), %xmm0 103 ; CHECK-NEXT: retq 104 %p = load <4 x float>, <4 x float>* %ptr 105 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 106 ret <4 x float> %t 107 } 108 109 define <4 x double> @floor_v4f64_load(<4 x double>* %ptr){ 110 ; CHECK-LABEL: floor_v4f64_load: 111 ; CHECK: ## %bb.0: 112 ; CHECK-NEXT: vroundpd $9, (%rdi), %ymm0 113 ; CHECK-NEXT: retq 114 %p = load <4 x double>, <4 x double>* %ptr 115 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 116 ret <4 x double> %t 117 } 118 119 define <8 x float> @floor_v8f32_load(<8 x float>* %ptr) { 120 ; CHECK-LABEL: floor_v8f32_load: 121 ; CHECK: ## %bb.0: 122 ; CHECK-NEXT: vroundps $9, (%rdi), %ymm0 123 ; CHECK-NEXT: retq 124 %p = load <8 x float>, <8 x float>* %ptr 125 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 126 ret <8 x float> %t 127 } 128 129 define <8 x double> @floor_v8f64_load(<8 x double>* %ptr){ 130 ; CHECK-LABEL: floor_v8f64_load: 131 ; CHECK: ## %bb.0: 132 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 133 ; CHECK-NEXT: retq 134 %p = load <8 x double>, <8 x double>* %ptr 135 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 136 ret <8 x double> %t 137 } 138 139 define <16 x float> @floor_v16f32_load(<16 x float>* %ptr) { 140 ; CHECK-LABEL: floor_v16f32_load: 141 ; CHECK: ## %bb.0: 142 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 143 ; CHECK-NEXT: retq 144 %p = load <16 x float>, <16 x float>* %ptr 145 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 146 ret <16 x float> %t 147 } 148 149 define <2 x double> @floor_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 150 ; CHECK-LABEL: floor_v2f64_mask: 151 ; CHECK: ## %bb.0: 152 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 153 ; CHECK-NEXT: vrndscalepd $9, %xmm0, %xmm1 {%k1} 154 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 155 ; CHECK-NEXT: retq 156 %c = icmp eq <2 x i64> %cmp, zeroinitializer 157 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 158 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 159 ret <2 x double> %s 160 } 161 162 define <4 x float> @floor_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 163 ; CHECK-LABEL: floor_v4f32_mask: 164 ; CHECK: ## %bb.0: 165 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 166 ; CHECK-NEXT: vrndscaleps $9, %xmm0, %xmm1 {%k1} 167 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 168 ; CHECK-NEXT: retq 169 %c = icmp eq <4 x i32> %cmp, zeroinitializer 170 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 171 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 172 ret <4 x float> %s 173 } 174 175 define <4 x double> @floor_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 176 ; CHECK-LABEL: floor_v4f64_mask: 177 ; CHECK: ## %bb.0: 178 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 179 ; CHECK-NEXT: vrndscalepd $9, %ymm0, %ymm1 {%k1} 180 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 181 ; CHECK-NEXT: retq 182 %c = icmp eq <4 x i64> %cmp, zeroinitializer 183 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 184 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 185 ret <4 x double> %s 186 } 187 188 define <8 x float> @floor_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 189 ; CHECK-LABEL: floor_v8f32_mask: 190 ; CHECK: ## %bb.0: 191 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 192 ; CHECK-NEXT: vrndscaleps $9, %ymm0, %ymm1 {%k1} 193 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 194 ; CHECK-NEXT: retq 195 %c = icmp eq <8 x i32> %cmp, zeroinitializer 196 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 197 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 198 ret <8 x float> %s 199 } 200 201 define <8 x double> @floor_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 202 ; CHECK-LABEL: floor_v8f64_mask: 203 ; CHECK: ## %bb.0: 204 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 205 ; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm1 {%k1} 206 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 207 ; CHECK-NEXT: retq 208 %c = icmp eq <8 x i64> %cmp, zeroinitializer 209 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 210 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 211 ret <8 x double> %s 212 } 213 214 define <16 x float> @floor_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 215 ; CHECK-LABEL: floor_v16f32_mask: 216 ; CHECK: ## %bb.0: 217 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 218 ; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm1 {%k1} 219 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 220 ; CHECK-NEXT: retq 221 %c = icmp eq <16 x i32> %cmp, zeroinitializer 222 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 223 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 224 ret <16 x float> %s 225 } 226 227 define <2 x double> @floor_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 228 ; CHECK-LABEL: floor_v2f64_maskz: 229 ; CHECK: ## %bb.0: 230 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 231 ; CHECK-NEXT: vrndscalepd $9, %xmm0, %xmm0 {%k1} {z} 232 ; CHECK-NEXT: retq 233 %c = icmp eq <2 x i64> %cmp, zeroinitializer 234 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 235 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 236 ret <2 x double> %s 237 } 238 239 define <4 x float> @floor_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 240 ; CHECK-LABEL: floor_v4f32_maskz: 241 ; CHECK: ## %bb.0: 242 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 243 ; CHECK-NEXT: vrndscaleps $9, %xmm0, %xmm0 {%k1} {z} 244 ; CHECK-NEXT: retq 245 %c = icmp eq <4 x i32> %cmp, zeroinitializer 246 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 247 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 248 ret <4 x float> %s 249 } 250 251 define <4 x double> @floor_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 252 ; CHECK-LABEL: floor_v4f64_maskz: 253 ; CHECK: ## %bb.0: 254 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 255 ; CHECK-NEXT: vrndscalepd $9, %ymm0, %ymm0 {%k1} {z} 256 ; CHECK-NEXT: retq 257 %c = icmp eq <4 x i64> %cmp, zeroinitializer 258 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 259 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 260 ret <4 x double> %s 261 } 262 263 define <8 x float> @floor_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 264 ; CHECK-LABEL: floor_v8f32_maskz: 265 ; CHECK: ## %bb.0: 266 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 267 ; CHECK-NEXT: vrndscaleps $9, %ymm0, %ymm0 {%k1} {z} 268 ; CHECK-NEXT: retq 269 %c = icmp eq <8 x i32> %cmp, zeroinitializer 270 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 271 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 272 ret <8 x float> %s 273 } 274 275 define <8 x double> @floor_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 276 ; CHECK-LABEL: floor_v8f64_maskz: 277 ; CHECK: ## %bb.0: 278 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 279 ; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0 {%k1} {z} 280 ; CHECK-NEXT: retq 281 %c = icmp eq <8 x i64> %cmp, zeroinitializer 282 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 283 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 284 ret <8 x double> %s 285 } 286 287 define <16 x float> @floor_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 288 ; CHECK-LABEL: floor_v16f32_maskz: 289 ; CHECK: ## %bb.0: 290 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 291 ; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0 {%k1} {z} 292 ; CHECK-NEXT: retq 293 %c = icmp eq <16 x i32> %cmp, zeroinitializer 294 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 295 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 296 ret <16 x float> %s 297 } 298 299 define <2 x double> @floor_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 300 ; CHECK-LABEL: floor_v2f64_mask_load: 301 ; CHECK: ## %bb.0: 302 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 303 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %xmm0 {%k1} 304 ; CHECK-NEXT: retq 305 %c = icmp eq <2 x i64> %cmp, zeroinitializer 306 %p = load <2 x double>, <2 x double>* %ptr 307 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 308 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 309 ret <2 x double> %s 310 } 311 312 define <4 x float> @floor_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 313 ; CHECK-LABEL: floor_v4f32_mask_load: 314 ; CHECK: ## %bb.0: 315 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 316 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %xmm0 {%k1} 317 ; CHECK-NEXT: retq 318 %c = icmp eq <4 x i32> %cmp, zeroinitializer 319 %p = load <4 x float>, <4 x float>* %ptr 320 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 321 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 322 ret <4 x float> %s 323 } 324 325 define <4 x double> @floor_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 326 ; CHECK-LABEL: floor_v4f64_mask_load: 327 ; CHECK: ## %bb.0: 328 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 329 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %ymm0 {%k1} 330 ; CHECK-NEXT: retq 331 %c = icmp eq <4 x i64> %cmp, zeroinitializer 332 %p = load <4 x double>, <4 x double>* %ptr 333 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 334 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 335 ret <4 x double> %s 336 } 337 338 define <8 x float> @floor_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 339 ; CHECK-LABEL: floor_v8f32_mask_load: 340 ; CHECK: ## %bb.0: 341 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 342 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %ymm0 {%k1} 343 ; CHECK-NEXT: retq 344 %c = icmp eq <8 x i32> %cmp, zeroinitializer 345 %p = load <8 x float>, <8 x float>* %ptr 346 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 347 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 348 ret <8 x float> %s 349 } 350 351 define <8 x double> @floor_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 352 ; CHECK-LABEL: floor_v8f64_mask_load: 353 ; CHECK: ## %bb.0: 354 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 355 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 {%k1} 356 ; CHECK-NEXT: retq 357 %c = icmp eq <8 x i64> %cmp, zeroinitializer 358 %p = load <8 x double>, <8 x double>* %ptr 359 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 360 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 361 ret <8 x double> %s 362 } 363 364 define <16 x float> @floor_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 365 ; CHECK-LABEL: floor_v16f32_mask_load: 366 ; CHECK: ## %bb.0: 367 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 368 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 {%k1} 369 ; CHECK-NEXT: retq 370 %c = icmp eq <16 x i32> %cmp, zeroinitializer 371 %p = load <16 x float>, <16 x float>* %ptr 372 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 373 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 374 ret <16 x float> %s 375 } 376 377 define <2 x double> @floor_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 378 ; CHECK-LABEL: floor_v2f64_maskz_load: 379 ; CHECK: ## %bb.0: 380 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 381 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %xmm0 {%k1} {z} 382 ; CHECK-NEXT: retq 383 %c = icmp eq <2 x i64> %cmp, zeroinitializer 384 %p = load <2 x double>, <2 x double>* %ptr 385 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 386 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 387 ret <2 x double> %s 388 } 389 390 define <4 x float> @floor_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 391 ; CHECK-LABEL: floor_v4f32_maskz_load: 392 ; CHECK: ## %bb.0: 393 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 394 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %xmm0 {%k1} {z} 395 ; CHECK-NEXT: retq 396 %c = icmp eq <4 x i32> %cmp, zeroinitializer 397 %p = load <4 x float>, <4 x float>* %ptr 398 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 399 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 400 ret <4 x float> %s 401 } 402 403 define <4 x double> @floor_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 404 ; CHECK-LABEL: floor_v4f64_maskz_load: 405 ; CHECK: ## %bb.0: 406 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 407 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %ymm0 {%k1} {z} 408 ; CHECK-NEXT: retq 409 %c = icmp eq <4 x i64> %cmp, zeroinitializer 410 %p = load <4 x double>, <4 x double>* %ptr 411 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 412 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 413 ret <4 x double> %s 414 } 415 416 define <8 x float> @floor_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 417 ; CHECK-LABEL: floor_v8f32_maskz_load: 418 ; CHECK: ## %bb.0: 419 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 420 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %ymm0 {%k1} {z} 421 ; CHECK-NEXT: retq 422 %c = icmp eq <8 x i32> %cmp, zeroinitializer 423 %p = load <8 x float>, <8 x float>* %ptr 424 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 425 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 426 ret <8 x float> %s 427 } 428 429 define <8 x double> @floor_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 430 ; CHECK-LABEL: floor_v8f64_maskz_load: 431 ; CHECK: ## %bb.0: 432 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 433 ; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 {%k1} {z} 434 ; CHECK-NEXT: retq 435 %c = icmp eq <8 x i64> %cmp, zeroinitializer 436 %p = load <8 x double>, <8 x double>* %ptr 437 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 438 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 439 ret <8 x double> %s 440 } 441 442 define <16 x float> @floor_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 443 ; CHECK-LABEL: floor_v16f32_maskz_load: 444 ; CHECK: ## %bb.0: 445 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 446 ; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 {%k1} {z} 447 ; CHECK-NEXT: retq 448 %c = icmp eq <16 x i32> %cmp, zeroinitializer 449 %p = load <16 x float>, <16 x float>* %ptr 450 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 451 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 452 ret <16 x float> %s 453 } 454 455 define <2 x double> @floor_v2f64_broadcast(double* %ptr) { 456 ; CHECK-LABEL: floor_v2f64_broadcast: 457 ; CHECK: ## %bb.0: 458 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 459 ; CHECK-NEXT: retq 460 %ps = load double, double* %ptr 461 %pins = insertelement <2 x double> undef, double %ps, i32 0 462 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 463 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 464 ret <2 x double> %t 465 } 466 467 define <4 x float> @floor_v4f32_broadcast(float* %ptr) { 468 ; CHECK-LABEL: floor_v4f32_broadcast: 469 ; CHECK: ## %bb.0: 470 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 471 ; CHECK-NEXT: retq 472 %ps = load float, float* %ptr 473 %pins = insertelement <4 x float> undef, float %ps, i32 0 474 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 475 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 476 ret <4 x float> %t 477 } 478 479 define <4 x double> @floor_v4f64_broadcast(double* %ptr){ 480 ; CHECK-LABEL: floor_v4f64_broadcast: 481 ; CHECK: ## %bb.0: 482 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 483 ; CHECK-NEXT: retq 484 %ps = load double, double* %ptr 485 %pins = insertelement <4 x double> undef, double %ps, i32 0 486 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 487 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 488 ret <4 x double> %t 489 } 490 491 define <8 x float> @floor_v8f32_broadcast(float* %ptr) { 492 ; CHECK-LABEL: floor_v8f32_broadcast: 493 ; CHECK: ## %bb.0: 494 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 495 ; CHECK-NEXT: retq 496 %ps = load float, float* %ptr 497 %pins = insertelement <8 x float> undef, float %ps, i32 0 498 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 499 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 500 ret <8 x float> %t 501 } 502 503 define <8 x double> @floor_v8f64_broadcast(double* %ptr){ 504 ; CHECK-LABEL: floor_v8f64_broadcast: 505 ; CHECK: ## %bb.0: 506 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 507 ; CHECK-NEXT: retq 508 %ps = load double, double* %ptr 509 %pins = insertelement <8 x double> undef, double %ps, i32 0 510 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 511 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 512 ret <8 x double> %t 513 } 514 515 define <16 x float> @floor_v16f32_broadcast(float* %ptr) { 516 ; CHECK-LABEL: floor_v16f32_broadcast: 517 ; CHECK: ## %bb.0: 518 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 519 ; CHECK-NEXT: retq 520 %ps = load float, float* %ptr 521 %pins = insertelement <16 x float> undef, float %ps, i32 0 522 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 523 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 524 ret <16 x float> %t 525 } 526 527 define <2 x double> @floor_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 528 ; CHECK-LABEL: floor_v2f64_mask_broadcast: 529 ; CHECK: ## %bb.0: 530 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 531 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} 532 ; CHECK-NEXT: retq 533 %c = icmp eq <2 x i64> %cmp, zeroinitializer 534 %ps = load double, double* %ptr 535 %pins = insertelement <2 x double> undef, double %ps, i32 0 536 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 537 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 538 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 539 ret <2 x double> %s 540 } 541 542 define <4 x float> @floor_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 543 ; CHECK-LABEL: floor_v4f32_mask_broadcast: 544 ; CHECK: ## %bb.0: 545 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 546 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} 547 ; CHECK-NEXT: retq 548 %c = icmp eq <4 x i32> %cmp, zeroinitializer 549 %ps = load float, float* %ptr 550 %pins = insertelement <4 x float> undef, float %ps, i32 0 551 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 552 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 553 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 554 ret <4 x float> %s 555 } 556 557 define <4 x double> @floor_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 558 ; CHECK-LABEL: floor_v4f64_mask_broadcast: 559 ; CHECK: ## %bb.0: 560 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 561 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} 562 ; CHECK-NEXT: retq 563 %c = icmp eq <4 x i64> %cmp, zeroinitializer 564 %ps = load double, double* %ptr 565 %pins = insertelement <4 x double> undef, double %ps, i32 0 566 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 567 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 568 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 569 ret <4 x double> %s 570 } 571 572 define <8 x float> @floor_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 573 ; CHECK-LABEL: floor_v8f32_mask_broadcast: 574 ; CHECK: ## %bb.0: 575 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 576 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} 577 ; CHECK-NEXT: retq 578 %c = icmp eq <8 x i32> %cmp, zeroinitializer 579 %ps = load float, float* %ptr 580 %pins = insertelement <8 x float> undef, float %ps, i32 0 581 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 582 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 583 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 584 ret <8 x float> %s 585 } 586 587 define <8 x double> @floor_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 588 ; CHECK-LABEL: floor_v8f64_mask_broadcast: 589 ; CHECK: ## %bb.0: 590 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 591 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} 592 ; CHECK-NEXT: retq 593 %c = icmp eq <8 x i64> %cmp, zeroinitializer 594 %ps = load double, double* %ptr 595 %pins = insertelement <8 x double> undef, double %ps, i32 0 596 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 597 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 598 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 599 ret <8 x double> %s 600 } 601 602 define <16 x float> @floor_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 603 ; CHECK-LABEL: floor_v16f32_mask_broadcast: 604 ; CHECK: ## %bb.0: 605 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 606 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} 607 ; CHECK-NEXT: retq 608 %c = icmp eq <16 x i32> %cmp, zeroinitializer 609 %ps = load float, float* %ptr 610 %pins = insertelement <16 x float> undef, float %ps, i32 0 611 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 612 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 613 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 614 ret <16 x float> %s 615 } 616 617 define <2 x double> @floor_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 618 ; CHECK-LABEL: floor_v2f64_maskz_broadcast: 619 ; CHECK: ## %bb.0: 620 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 621 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} {z} 622 ; CHECK-NEXT: retq 623 %c = icmp eq <2 x i64> %cmp, zeroinitializer 624 %ps = load double, double* %ptr 625 %pins = insertelement <2 x double> undef, double %ps, i32 0 626 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 627 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 628 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 629 ret <2 x double> %s 630 } 631 632 define <4 x float> @floor_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 633 ; CHECK-LABEL: floor_v4f32_maskz_broadcast: 634 ; CHECK: ## %bb.0: 635 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 636 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} {z} 637 ; CHECK-NEXT: retq 638 %c = icmp eq <4 x i32> %cmp, zeroinitializer 639 %ps = load float, float* %ptr 640 %pins = insertelement <4 x float> undef, float %ps, i32 0 641 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 642 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 643 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 644 ret <4 x float> %s 645 } 646 647 define <4 x double> @floor_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 648 ; CHECK-LABEL: floor_v4f64_maskz_broadcast: 649 ; CHECK: ## %bb.0: 650 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 651 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} {z} 652 ; CHECK-NEXT: retq 653 %c = icmp eq <4 x i64> %cmp, zeroinitializer 654 %ps = load double, double* %ptr 655 %pins = insertelement <4 x double> undef, double %ps, i32 0 656 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 657 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 658 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 659 ret <4 x double> %s 660 } 661 662 define <8 x float> @floor_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 663 ; CHECK-LABEL: floor_v8f32_maskz_broadcast: 664 ; CHECK: ## %bb.0: 665 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 666 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} {z} 667 ; CHECK-NEXT: retq 668 %c = icmp eq <8 x i32> %cmp, zeroinitializer 669 %ps = load float, float* %ptr 670 %pins = insertelement <8 x float> undef, float %ps, i32 0 671 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 672 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 673 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 674 ret <8 x float> %s 675 } 676 677 define <8 x double> @floor_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 678 ; CHECK-LABEL: floor_v8f64_maskz_broadcast: 679 ; CHECK: ## %bb.0: 680 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 681 ; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} {z} 682 ; CHECK-NEXT: retq 683 %c = icmp eq <8 x i64> %cmp, zeroinitializer 684 %ps = load double, double* %ptr 685 %pins = insertelement <8 x double> undef, double %ps, i32 0 686 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 687 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 688 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 689 ret <8 x double> %s 690 } 691 692 define <16 x float> @floor_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 693 ; CHECK-LABEL: floor_v16f32_maskz_broadcast: 694 ; CHECK: ## %bb.0: 695 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 696 ; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} {z} 697 ; CHECK-NEXT: retq 698 %c = icmp eq <16 x i32> %cmp, zeroinitializer 699 %ps = load float, float* %ptr 700 %pins = insertelement <16 x float> undef, float %ps, i32 0 701 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 702 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 703 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 704 ret <16 x float> %s 705 } 706 707 define <2 x double> @ceil_v2f64(<2 x double> %p) { 708 ; CHECK-LABEL: ceil_v2f64: 709 ; CHECK: ## %bb.0: 710 ; CHECK-NEXT: vroundpd $10, %xmm0, %xmm0 711 ; CHECK-NEXT: retq 712 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 713 ret <2 x double> %t 714 } 715 716 define <4 x float> @ceil_v4f32(<4 x float> %p) { 717 ; CHECK-LABEL: ceil_v4f32: 718 ; CHECK: ## %bb.0: 719 ; CHECK-NEXT: vroundps $10, %xmm0, %xmm0 720 ; CHECK-NEXT: retq 721 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 722 ret <4 x float> %t 723 } 724 725 define <4 x double> @ceil_v4f64(<4 x double> %p){ 726 ; CHECK-LABEL: ceil_v4f64: 727 ; CHECK: ## %bb.0: 728 ; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0 729 ; CHECK-NEXT: retq 730 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 731 ret <4 x double> %t 732 } 733 734 define <8 x float> @ceil_v8f32(<8 x float> %p) { 735 ; CHECK-LABEL: ceil_v8f32: 736 ; CHECK: ## %bb.0: 737 ; CHECK-NEXT: vroundps $10, %ymm0, %ymm0 738 ; CHECK-NEXT: retq 739 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 740 ret <8 x float> %t 741 } 742 743 define <8 x double> @ceil_v8f64(<8 x double> %p){ 744 ; CHECK-LABEL: ceil_v8f64: 745 ; CHECK: ## %bb.0: 746 ; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0 747 ; CHECK-NEXT: retq 748 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 749 ret <8 x double> %t 750 } 751 752 define <16 x float> @ceil_v16f32(<16 x float> %p) { 753 ; CHECK-LABEL: ceil_v16f32: 754 ; CHECK: ## %bb.0: 755 ; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0 756 ; CHECK-NEXT: retq 757 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 758 ret <16 x float> %t 759 } 760 761 define <2 x double> @ceil_v2f64_load(<2 x double>* %ptr) { 762 ; CHECK-LABEL: ceil_v2f64_load: 763 ; CHECK: ## %bb.0: 764 ; CHECK-NEXT: vroundpd $10, (%rdi), %xmm0 765 ; CHECK-NEXT: retq 766 %p = load <2 x double>, <2 x double>* %ptr 767 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 768 ret <2 x double> %t 769 } 770 771 define <4 x float> @ceil_v4f32_load(<4 x float>* %ptr) { 772 ; CHECK-LABEL: ceil_v4f32_load: 773 ; CHECK: ## %bb.0: 774 ; CHECK-NEXT: vroundps $10, (%rdi), %xmm0 775 ; CHECK-NEXT: retq 776 %p = load <4 x float>, <4 x float>* %ptr 777 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 778 ret <4 x float> %t 779 } 780 781 define <4 x double> @ceil_v4f64_load(<4 x double>* %ptr){ 782 ; CHECK-LABEL: ceil_v4f64_load: 783 ; CHECK: ## %bb.0: 784 ; CHECK-NEXT: vroundpd $10, (%rdi), %ymm0 785 ; CHECK-NEXT: retq 786 %p = load <4 x double>, <4 x double>* %ptr 787 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 788 ret <4 x double> %t 789 } 790 791 define <8 x float> @ceil_v8f32_load(<8 x float>* %ptr) { 792 ; CHECK-LABEL: ceil_v8f32_load: 793 ; CHECK: ## %bb.0: 794 ; CHECK-NEXT: vroundps $10, (%rdi), %ymm0 795 ; CHECK-NEXT: retq 796 %p = load <8 x float>, <8 x float>* %ptr 797 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 798 ret <8 x float> %t 799 } 800 801 define <8 x double> @ceil_v8f64_load(<8 x double>* %ptr){ 802 ; CHECK-LABEL: ceil_v8f64_load: 803 ; CHECK: ## %bb.0: 804 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 805 ; CHECK-NEXT: retq 806 %p = load <8 x double>, <8 x double>* %ptr 807 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 808 ret <8 x double> %t 809 } 810 811 define <16 x float> @ceil_v16f32_load(<16 x float>* %ptr) { 812 ; CHECK-LABEL: ceil_v16f32_load: 813 ; CHECK: ## %bb.0: 814 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 815 ; CHECK-NEXT: retq 816 %p = load <16 x float>, <16 x float>* %ptr 817 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 818 ret <16 x float> %t 819 } 820 821 define <2 x double> @ceil_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 822 ; CHECK-LABEL: ceil_v2f64_mask: 823 ; CHECK: ## %bb.0: 824 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 825 ; CHECK-NEXT: vrndscalepd $10, %xmm0, %xmm1 {%k1} 826 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 827 ; CHECK-NEXT: retq 828 %c = icmp eq <2 x i64> %cmp, zeroinitializer 829 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 830 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 831 ret <2 x double> %s 832 } 833 834 define <4 x float> @ceil_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 835 ; CHECK-LABEL: ceil_v4f32_mask: 836 ; CHECK: ## %bb.0: 837 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 838 ; CHECK-NEXT: vrndscaleps $10, %xmm0, %xmm1 {%k1} 839 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 840 ; CHECK-NEXT: retq 841 %c = icmp eq <4 x i32> %cmp, zeroinitializer 842 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 843 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 844 ret <4 x float> %s 845 } 846 847 define <4 x double> @ceil_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 848 ; CHECK-LABEL: ceil_v4f64_mask: 849 ; CHECK: ## %bb.0: 850 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 851 ; CHECK-NEXT: vrndscalepd $10, %ymm0, %ymm1 {%k1} 852 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 853 ; CHECK-NEXT: retq 854 %c = icmp eq <4 x i64> %cmp, zeroinitializer 855 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 856 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 857 ret <4 x double> %s 858 } 859 860 define <8 x float> @ceil_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 861 ; CHECK-LABEL: ceil_v8f32_mask: 862 ; CHECK: ## %bb.0: 863 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 864 ; CHECK-NEXT: vrndscaleps $10, %ymm0, %ymm1 {%k1} 865 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 866 ; CHECK-NEXT: retq 867 %c = icmp eq <8 x i32> %cmp, zeroinitializer 868 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 869 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 870 ret <8 x float> %s 871 } 872 873 define <8 x double> @ceil_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 874 ; CHECK-LABEL: ceil_v8f64_mask: 875 ; CHECK: ## %bb.0: 876 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 877 ; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm1 {%k1} 878 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 879 ; CHECK-NEXT: retq 880 %c = icmp eq <8 x i64> %cmp, zeroinitializer 881 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 882 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 883 ret <8 x double> %s 884 } 885 886 define <16 x float> @ceil_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 887 ; CHECK-LABEL: ceil_v16f32_mask: 888 ; CHECK: ## %bb.0: 889 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 890 ; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm1 {%k1} 891 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 892 ; CHECK-NEXT: retq 893 %c = icmp eq <16 x i32> %cmp, zeroinitializer 894 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 895 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 896 ret <16 x float> %s 897 } 898 899 define <2 x double> @ceil_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 900 ; CHECK-LABEL: ceil_v2f64_maskz: 901 ; CHECK: ## %bb.0: 902 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 903 ; CHECK-NEXT: vrndscalepd $10, %xmm0, %xmm0 {%k1} {z} 904 ; CHECK-NEXT: retq 905 %c = icmp eq <2 x i64> %cmp, zeroinitializer 906 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 907 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 908 ret <2 x double> %s 909 } 910 911 define <4 x float> @ceil_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 912 ; CHECK-LABEL: ceil_v4f32_maskz: 913 ; CHECK: ## %bb.0: 914 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 915 ; CHECK-NEXT: vrndscaleps $10, %xmm0, %xmm0 {%k1} {z} 916 ; CHECK-NEXT: retq 917 %c = icmp eq <4 x i32> %cmp, zeroinitializer 918 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 919 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 920 ret <4 x float> %s 921 } 922 923 define <4 x double> @ceil_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 924 ; CHECK-LABEL: ceil_v4f64_maskz: 925 ; CHECK: ## %bb.0: 926 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 927 ; CHECK-NEXT: vrndscalepd $10, %ymm0, %ymm0 {%k1} {z} 928 ; CHECK-NEXT: retq 929 %c = icmp eq <4 x i64> %cmp, zeroinitializer 930 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 931 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 932 ret <4 x double> %s 933 } 934 935 define <8 x float> @ceil_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 936 ; CHECK-LABEL: ceil_v8f32_maskz: 937 ; CHECK: ## %bb.0: 938 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 939 ; CHECK-NEXT: vrndscaleps $10, %ymm0, %ymm0 {%k1} {z} 940 ; CHECK-NEXT: retq 941 %c = icmp eq <8 x i32> %cmp, zeroinitializer 942 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 943 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 944 ret <8 x float> %s 945 } 946 947 define <8 x double> @ceil_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 948 ; CHECK-LABEL: ceil_v8f64_maskz: 949 ; CHECK: ## %bb.0: 950 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 951 ; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0 {%k1} {z} 952 ; CHECK-NEXT: retq 953 %c = icmp eq <8 x i64> %cmp, zeroinitializer 954 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 955 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 956 ret <8 x double> %s 957 } 958 959 define <16 x float> @ceil_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 960 ; CHECK-LABEL: ceil_v16f32_maskz: 961 ; CHECK: ## %bb.0: 962 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 963 ; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0 {%k1} {z} 964 ; CHECK-NEXT: retq 965 %c = icmp eq <16 x i32> %cmp, zeroinitializer 966 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 967 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 968 ret <16 x float> %s 969 } 970 971 define <2 x double> @ceil_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 972 ; CHECK-LABEL: ceil_v2f64_mask_load: 973 ; CHECK: ## %bb.0: 974 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 975 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %xmm0 {%k1} 976 ; CHECK-NEXT: retq 977 %c = icmp eq <2 x i64> %cmp, zeroinitializer 978 %p = load <2 x double>, <2 x double>* %ptr 979 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 980 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 981 ret <2 x double> %s 982 } 983 984 define <4 x float> @ceil_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 985 ; CHECK-LABEL: ceil_v4f32_mask_load: 986 ; CHECK: ## %bb.0: 987 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 988 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %xmm0 {%k1} 989 ; CHECK-NEXT: retq 990 %c = icmp eq <4 x i32> %cmp, zeroinitializer 991 %p = load <4 x float>, <4 x float>* %ptr 992 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 993 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 994 ret <4 x float> %s 995 } 996 997 define <4 x double> @ceil_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 998 ; CHECK-LABEL: ceil_v4f64_mask_load: 999 ; CHECK: ## %bb.0: 1000 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1001 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %ymm0 {%k1} 1002 ; CHECK-NEXT: retq 1003 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1004 %p = load <4 x double>, <4 x double>* %ptr 1005 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1006 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1007 ret <4 x double> %s 1008 } 1009 1010 define <8 x float> @ceil_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1011 ; CHECK-LABEL: ceil_v8f32_mask_load: 1012 ; CHECK: ## %bb.0: 1013 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1014 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %ymm0 {%k1} 1015 ; CHECK-NEXT: retq 1016 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1017 %p = load <8 x float>, <8 x float>* %ptr 1018 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1019 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1020 ret <8 x float> %s 1021 } 1022 1023 define <8 x double> @ceil_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1024 ; CHECK-LABEL: ceil_v8f64_mask_load: 1025 ; CHECK: ## %bb.0: 1026 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1027 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 {%k1} 1028 ; CHECK-NEXT: retq 1029 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1030 %p = load <8 x double>, <8 x double>* %ptr 1031 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1032 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1033 ret <8 x double> %s 1034 } 1035 1036 define <16 x float> @ceil_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1037 ; CHECK-LABEL: ceil_v16f32_mask_load: 1038 ; CHECK: ## %bb.0: 1039 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1040 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 {%k1} 1041 ; CHECK-NEXT: retq 1042 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1043 %p = load <16 x float>, <16 x float>* %ptr 1044 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1045 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1046 ret <16 x float> %s 1047 } 1048 1049 define <2 x double> @ceil_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 1050 ; CHECK-LABEL: ceil_v2f64_maskz_load: 1051 ; CHECK: ## %bb.0: 1052 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1053 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %xmm0 {%k1} {z} 1054 ; CHECK-NEXT: retq 1055 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1056 %p = load <2 x double>, <2 x double>* %ptr 1057 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1058 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1059 ret <2 x double> %s 1060 } 1061 1062 define <4 x float> @ceil_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 1063 ; CHECK-LABEL: ceil_v4f32_maskz_load: 1064 ; CHECK: ## %bb.0: 1065 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1066 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %xmm0 {%k1} {z} 1067 ; CHECK-NEXT: retq 1068 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1069 %p = load <4 x float>, <4 x float>* %ptr 1070 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1071 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1072 ret <4 x float> %s 1073 } 1074 1075 define <4 x double> @ceil_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 1076 ; CHECK-LABEL: ceil_v4f64_maskz_load: 1077 ; CHECK: ## %bb.0: 1078 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1079 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %ymm0 {%k1} {z} 1080 ; CHECK-NEXT: retq 1081 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1082 %p = load <4 x double>, <4 x double>* %ptr 1083 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1084 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1085 ret <4 x double> %s 1086 } 1087 1088 define <8 x float> @ceil_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 1089 ; CHECK-LABEL: ceil_v8f32_maskz_load: 1090 ; CHECK: ## %bb.0: 1091 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 1092 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %ymm0 {%k1} {z} 1093 ; CHECK-NEXT: retq 1094 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1095 %p = load <8 x float>, <8 x float>* %ptr 1096 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1097 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1098 ret <8 x float> %s 1099 } 1100 1101 define <8 x double> @ceil_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 1102 ; CHECK-LABEL: ceil_v8f64_maskz_load: 1103 ; CHECK: ## %bb.0: 1104 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1105 ; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 {%k1} {z} 1106 ; CHECK-NEXT: retq 1107 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1108 %p = load <8 x double>, <8 x double>* %ptr 1109 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1110 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1111 ret <8 x double> %s 1112 } 1113 1114 define <16 x float> @ceil_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 1115 ; CHECK-LABEL: ceil_v16f32_maskz_load: 1116 ; CHECK: ## %bb.0: 1117 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1118 ; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 {%k1} {z} 1119 ; CHECK-NEXT: retq 1120 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1121 %p = load <16 x float>, <16 x float>* %ptr 1122 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1123 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1124 ret <16 x float> %s 1125 } 1126 1127 define <2 x double> @ceil_v2f64_broadcast(double* %ptr) { 1128 ; CHECK-LABEL: ceil_v2f64_broadcast: 1129 ; CHECK: ## %bb.0: 1130 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 1131 ; CHECK-NEXT: retq 1132 %ps = load double, double* %ptr 1133 %pins = insertelement <2 x double> undef, double %ps, i32 0 1134 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1135 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1136 ret <2 x double> %t 1137 } 1138 1139 define <4 x float> @ceil_v4f32_broadcast(float* %ptr) { 1140 ; CHECK-LABEL: ceil_v4f32_broadcast: 1141 ; CHECK: ## %bb.0: 1142 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 1143 ; CHECK-NEXT: retq 1144 %ps = load float, float* %ptr 1145 %pins = insertelement <4 x float> undef, float %ps, i32 0 1146 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1147 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1148 ret <4 x float> %t 1149 } 1150 1151 define <4 x double> @ceil_v4f64_broadcast(double* %ptr){ 1152 ; CHECK-LABEL: ceil_v4f64_broadcast: 1153 ; CHECK: ## %bb.0: 1154 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 1155 ; CHECK-NEXT: retq 1156 %ps = load double, double* %ptr 1157 %pins = insertelement <4 x double> undef, double %ps, i32 0 1158 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1159 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1160 ret <4 x double> %t 1161 } 1162 1163 define <8 x float> @ceil_v8f32_broadcast(float* %ptr) { 1164 ; CHECK-LABEL: ceil_v8f32_broadcast: 1165 ; CHECK: ## %bb.0: 1166 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 1167 ; CHECK-NEXT: retq 1168 %ps = load float, float* %ptr 1169 %pins = insertelement <8 x float> undef, float %ps, i32 0 1170 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1171 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1172 ret <8 x float> %t 1173 } 1174 1175 define <8 x double> @ceil_v8f64_broadcast(double* %ptr){ 1176 ; CHECK-LABEL: ceil_v8f64_broadcast: 1177 ; CHECK: ## %bb.0: 1178 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 1179 ; CHECK-NEXT: retq 1180 %ps = load double, double* %ptr 1181 %pins = insertelement <8 x double> undef, double %ps, i32 0 1182 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1183 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1184 ret <8 x double> %t 1185 } 1186 1187 define <16 x float> @ceil_v16f32_broadcast(float* %ptr) { 1188 ; CHECK-LABEL: ceil_v16f32_broadcast: 1189 ; CHECK: ## %bb.0: 1190 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 1191 ; CHECK-NEXT: retq 1192 %ps = load float, float* %ptr 1193 %pins = insertelement <16 x float> undef, float %ps, i32 0 1194 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1195 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1196 ret <16 x float> %t 1197 } 1198 1199 define <2 x double> @ceil_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 1200 ; CHECK-LABEL: ceil_v2f64_mask_broadcast: 1201 ; CHECK: ## %bb.0: 1202 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1203 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} 1204 ; CHECK-NEXT: retq 1205 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1206 %ps = load double, double* %ptr 1207 %pins = insertelement <2 x double> undef, double %ps, i32 0 1208 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1209 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1210 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1211 ret <2 x double> %s 1212 } 1213 1214 define <4 x float> @ceil_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 1215 ; CHECK-LABEL: ceil_v4f32_mask_broadcast: 1216 ; CHECK: ## %bb.0: 1217 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1218 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} 1219 ; CHECK-NEXT: retq 1220 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1221 %ps = load float, float* %ptr 1222 %pins = insertelement <4 x float> undef, float %ps, i32 0 1223 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1224 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1225 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1226 ret <4 x float> %s 1227 } 1228 1229 define <4 x double> @ceil_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 1230 ; CHECK-LABEL: ceil_v4f64_mask_broadcast: 1231 ; CHECK: ## %bb.0: 1232 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1233 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} 1234 ; CHECK-NEXT: retq 1235 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1236 %ps = load double, double* %ptr 1237 %pins = insertelement <4 x double> undef, double %ps, i32 0 1238 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1239 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1240 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1241 ret <4 x double> %s 1242 } 1243 1244 define <8 x float> @ceil_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1245 ; CHECK-LABEL: ceil_v8f32_mask_broadcast: 1246 ; CHECK: ## %bb.0: 1247 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1248 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} 1249 ; CHECK-NEXT: retq 1250 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1251 %ps = load float, float* %ptr 1252 %pins = insertelement <8 x float> undef, float %ps, i32 0 1253 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1254 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1255 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1256 ret <8 x float> %s 1257 } 1258 1259 define <8 x double> @ceil_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1260 ; CHECK-LABEL: ceil_v8f64_mask_broadcast: 1261 ; CHECK: ## %bb.0: 1262 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1263 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} 1264 ; CHECK-NEXT: retq 1265 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1266 %ps = load double, double* %ptr 1267 %pins = insertelement <8 x double> undef, double %ps, i32 0 1268 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1269 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1270 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1271 ret <8 x double> %s 1272 } 1273 1274 define <16 x float> @ceil_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1275 ; CHECK-LABEL: ceil_v16f32_mask_broadcast: 1276 ; CHECK: ## %bb.0: 1277 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1278 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} 1279 ; CHECK-NEXT: retq 1280 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1281 %ps = load float, float* %ptr 1282 %pins = insertelement <16 x float> undef, float %ps, i32 0 1283 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1284 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1285 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1286 ret <16 x float> %s 1287 } 1288 1289 define <2 x double> @ceil_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 1290 ; CHECK-LABEL: ceil_v2f64_maskz_broadcast: 1291 ; CHECK: ## %bb.0: 1292 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1293 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} {z} 1294 ; CHECK-NEXT: retq 1295 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1296 %ps = load double, double* %ptr 1297 %pins = insertelement <2 x double> undef, double %ps, i32 0 1298 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1299 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1300 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1301 ret <2 x double> %s 1302 } 1303 1304 define <4 x float> @ceil_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 1305 ; CHECK-LABEL: ceil_v4f32_maskz_broadcast: 1306 ; CHECK: ## %bb.0: 1307 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1308 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} {z} 1309 ; CHECK-NEXT: retq 1310 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1311 %ps = load float, float* %ptr 1312 %pins = insertelement <4 x float> undef, float %ps, i32 0 1313 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1314 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1315 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1316 ret <4 x float> %s 1317 } 1318 1319 define <4 x double> @ceil_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 1320 ; CHECK-LABEL: ceil_v4f64_maskz_broadcast: 1321 ; CHECK: ## %bb.0: 1322 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1323 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} {z} 1324 ; CHECK-NEXT: retq 1325 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1326 %ps = load double, double* %ptr 1327 %pins = insertelement <4 x double> undef, double %ps, i32 0 1328 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1329 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1330 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1331 ret <4 x double> %s 1332 } 1333 1334 define <8 x float> @ceil_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 1335 ; CHECK-LABEL: ceil_v8f32_maskz_broadcast: 1336 ; CHECK: ## %bb.0: 1337 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 1338 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} {z} 1339 ; CHECK-NEXT: retq 1340 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1341 %ps = load float, float* %ptr 1342 %pins = insertelement <8 x float> undef, float %ps, i32 0 1343 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1344 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1345 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1346 ret <8 x float> %s 1347 } 1348 1349 define <8 x double> @ceil_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 1350 ; CHECK-LABEL: ceil_v8f64_maskz_broadcast: 1351 ; CHECK: ## %bb.0: 1352 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1353 ; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} {z} 1354 ; CHECK-NEXT: retq 1355 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1356 %ps = load double, double* %ptr 1357 %pins = insertelement <8 x double> undef, double %ps, i32 0 1358 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1359 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1360 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1361 ret <8 x double> %s 1362 } 1363 1364 define <16 x float> @ceil_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 1365 ; CHECK-LABEL: ceil_v16f32_maskz_broadcast: 1366 ; CHECK: ## %bb.0: 1367 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1368 ; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} {z} 1369 ; CHECK-NEXT: retq 1370 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1371 %ps = load float, float* %ptr 1372 %pins = insertelement <16 x float> undef, float %ps, i32 0 1373 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1374 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1375 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1376 ret <16 x float> %s 1377 } 1378 1379 define <2 x double> @trunc_v2f64(<2 x double> %p) { 1380 ; CHECK-LABEL: trunc_v2f64: 1381 ; CHECK: ## %bb.0: 1382 ; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0 1383 ; CHECK-NEXT: retq 1384 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1385 ret <2 x double> %t 1386 } 1387 1388 define <4 x float> @trunc_v4f32(<4 x float> %p) { 1389 ; CHECK-LABEL: trunc_v4f32: 1390 ; CHECK: ## %bb.0: 1391 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 1392 ; CHECK-NEXT: retq 1393 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1394 ret <4 x float> %t 1395 } 1396 1397 define <4 x double> @trunc_v4f64(<4 x double> %p){ 1398 ; CHECK-LABEL: trunc_v4f64: 1399 ; CHECK: ## %bb.0: 1400 ; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 1401 ; CHECK-NEXT: retq 1402 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1403 ret <4 x double> %t 1404 } 1405 1406 define <8 x float> @trunc_v8f32(<8 x float> %p) { 1407 ; CHECK-LABEL: trunc_v8f32: 1408 ; CHECK: ## %bb.0: 1409 ; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 1410 ; CHECK-NEXT: retq 1411 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1412 ret <8 x float> %t 1413 } 1414 1415 define <8 x double> @trunc_v8f64(<8 x double> %p){ 1416 ; CHECK-LABEL: trunc_v8f64: 1417 ; CHECK: ## %bb.0: 1418 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 1419 ; CHECK-NEXT: retq 1420 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1421 ret <8 x double> %t 1422 } 1423 1424 define <16 x float> @trunc_v16f32(<16 x float> %p) { 1425 ; CHECK-LABEL: trunc_v16f32: 1426 ; CHECK: ## %bb.0: 1427 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 1428 ; CHECK-NEXT: retq 1429 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1430 ret <16 x float> %t 1431 } 1432 1433 define <2 x double> @trunc_v2f64_load(<2 x double>* %ptr) { 1434 ; CHECK-LABEL: trunc_v2f64_load: 1435 ; CHECK: ## %bb.0: 1436 ; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0 1437 ; CHECK-NEXT: retq 1438 %p = load <2 x double>, <2 x double>* %ptr 1439 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1440 ret <2 x double> %t 1441 } 1442 1443 define <4 x float> @trunc_v4f32_load(<4 x float>* %ptr) { 1444 ; CHECK-LABEL: trunc_v4f32_load: 1445 ; CHECK: ## %bb.0: 1446 ; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 1447 ; CHECK-NEXT: retq 1448 %p = load <4 x float>, <4 x float>* %ptr 1449 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1450 ret <4 x float> %t 1451 } 1452 1453 define <4 x double> @trunc_v4f64_load(<4 x double>* %ptr){ 1454 ; CHECK-LABEL: trunc_v4f64_load: 1455 ; CHECK: ## %bb.0: 1456 ; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 1457 ; CHECK-NEXT: retq 1458 %p = load <4 x double>, <4 x double>* %ptr 1459 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1460 ret <4 x double> %t 1461 } 1462 1463 define <8 x float> @trunc_v8f32_load(<8 x float>* %ptr) { 1464 ; CHECK-LABEL: trunc_v8f32_load: 1465 ; CHECK: ## %bb.0: 1466 ; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 1467 ; CHECK-NEXT: retq 1468 %p = load <8 x float>, <8 x float>* %ptr 1469 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1470 ret <8 x float> %t 1471 } 1472 1473 define <8 x double> @trunc_v8f64_load(<8 x double>* %ptr){ 1474 ; CHECK-LABEL: trunc_v8f64_load: 1475 ; CHECK: ## %bb.0: 1476 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 1477 ; CHECK-NEXT: retq 1478 %p = load <8 x double>, <8 x double>* %ptr 1479 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1480 ret <8 x double> %t 1481 } 1482 1483 define <16 x float> @trunc_v16f32_load(<16 x float>* %ptr) { 1484 ; CHECK-LABEL: trunc_v16f32_load: 1485 ; CHECK: ## %bb.0: 1486 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 1487 ; CHECK-NEXT: retq 1488 %p = load <16 x float>, <16 x float>* %ptr 1489 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1490 ret <16 x float> %t 1491 } 1492 1493 define <2 x double> @trunc_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 1494 ; CHECK-LABEL: trunc_v2f64_mask: 1495 ; CHECK: ## %bb.0: 1496 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 1497 ; CHECK-NEXT: vrndscalepd $11, %xmm0, %xmm1 {%k1} 1498 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 1499 ; CHECK-NEXT: retq 1500 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1501 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1502 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1503 ret <2 x double> %s 1504 } 1505 1506 define <4 x float> @trunc_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 1507 ; CHECK-LABEL: trunc_v4f32_mask: 1508 ; CHECK: ## %bb.0: 1509 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 1510 ; CHECK-NEXT: vrndscaleps $11, %xmm0, %xmm1 {%k1} 1511 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 1512 ; CHECK-NEXT: retq 1513 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1514 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1515 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1516 ret <4 x float> %s 1517 } 1518 1519 define <4 x double> @trunc_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 1520 ; CHECK-LABEL: trunc_v4f64_mask: 1521 ; CHECK: ## %bb.0: 1522 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1523 ; CHECK-NEXT: vrndscalepd $11, %ymm0, %ymm1 {%k1} 1524 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 1525 ; CHECK-NEXT: retq 1526 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1527 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1528 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1529 ret <4 x double> %s 1530 } 1531 1532 define <8 x float> @trunc_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 1533 ; CHECK-LABEL: trunc_v8f32_mask: 1534 ; CHECK: ## %bb.0: 1535 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1536 ; CHECK-NEXT: vrndscaleps $11, %ymm0, %ymm1 {%k1} 1537 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 1538 ; CHECK-NEXT: retq 1539 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1540 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1541 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1542 ret <8 x float> %s 1543 } 1544 1545 define <8 x double> @trunc_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 1546 ; CHECK-LABEL: trunc_v8f64_mask: 1547 ; CHECK: ## %bb.0: 1548 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1549 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm1 {%k1} 1550 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 1551 ; CHECK-NEXT: retq 1552 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1553 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1554 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1555 ret <8 x double> %s 1556 } 1557 1558 define <16 x float> @trunc_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 1559 ; CHECK-LABEL: trunc_v16f32_mask: 1560 ; CHECK: ## %bb.0: 1561 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1562 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm1 {%k1} 1563 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 1564 ; CHECK-NEXT: retq 1565 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1566 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1567 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1568 ret <16 x float> %s 1569 } 1570 1571 define <2 x double> @trunc_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 1572 ; CHECK-LABEL: trunc_v2f64_maskz: 1573 ; CHECK: ## %bb.0: 1574 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1575 ; CHECK-NEXT: vrndscalepd $11, %xmm0, %xmm0 {%k1} {z} 1576 ; CHECK-NEXT: retq 1577 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1578 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1579 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1580 ret <2 x double> %s 1581 } 1582 1583 define <4 x float> @trunc_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 1584 ; CHECK-LABEL: trunc_v4f32_maskz: 1585 ; CHECK: ## %bb.0: 1586 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1587 ; CHECK-NEXT: vrndscaleps $11, %xmm0, %xmm0 {%k1} {z} 1588 ; CHECK-NEXT: retq 1589 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1590 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1591 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1592 ret <4 x float> %s 1593 } 1594 1595 define <4 x double> @trunc_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 1596 ; CHECK-LABEL: trunc_v4f64_maskz: 1597 ; CHECK: ## %bb.0: 1598 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1599 ; CHECK-NEXT: vrndscalepd $11, %ymm0, %ymm0 {%k1} {z} 1600 ; CHECK-NEXT: retq 1601 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1602 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1603 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1604 ret <4 x double> %s 1605 } 1606 1607 define <8 x float> @trunc_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 1608 ; CHECK-LABEL: trunc_v8f32_maskz: 1609 ; CHECK: ## %bb.0: 1610 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1611 ; CHECK-NEXT: vrndscaleps $11, %ymm0, %ymm0 {%k1} {z} 1612 ; CHECK-NEXT: retq 1613 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1614 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1615 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1616 ret <8 x float> %s 1617 } 1618 1619 define <8 x double> @trunc_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 1620 ; CHECK-LABEL: trunc_v8f64_maskz: 1621 ; CHECK: ## %bb.0: 1622 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1623 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 {%k1} {z} 1624 ; CHECK-NEXT: retq 1625 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1626 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1627 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1628 ret <8 x double> %s 1629 } 1630 1631 define <16 x float> @trunc_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 1632 ; CHECK-LABEL: trunc_v16f32_maskz: 1633 ; CHECK: ## %bb.0: 1634 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1635 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 {%k1} {z} 1636 ; CHECK-NEXT: retq 1637 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1638 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1639 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1640 ret <16 x float> %s 1641 } 1642 1643 define <2 x double> @trunc_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 1644 ; CHECK-LABEL: trunc_v2f64_mask_load: 1645 ; CHECK: ## %bb.0: 1646 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1647 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %xmm0 {%k1} 1648 ; CHECK-NEXT: retq 1649 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1650 %p = load <2 x double>, <2 x double>* %ptr 1651 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1652 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1653 ret <2 x double> %s 1654 } 1655 1656 define <4 x float> @trunc_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 1657 ; CHECK-LABEL: trunc_v4f32_mask_load: 1658 ; CHECK: ## %bb.0: 1659 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1660 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %xmm0 {%k1} 1661 ; CHECK-NEXT: retq 1662 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1663 %p = load <4 x float>, <4 x float>* %ptr 1664 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1665 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1666 ret <4 x float> %s 1667 } 1668 1669 define <4 x double> @trunc_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 1670 ; CHECK-LABEL: trunc_v4f64_mask_load: 1671 ; CHECK: ## %bb.0: 1672 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1673 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %ymm0 {%k1} 1674 ; CHECK-NEXT: retq 1675 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1676 %p = load <4 x double>, <4 x double>* %ptr 1677 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1678 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1679 ret <4 x double> %s 1680 } 1681 1682 define <8 x float> @trunc_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1683 ; CHECK-LABEL: trunc_v8f32_mask_load: 1684 ; CHECK: ## %bb.0: 1685 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1686 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %ymm0 {%k1} 1687 ; CHECK-NEXT: retq 1688 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1689 %p = load <8 x float>, <8 x float>* %ptr 1690 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1691 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1692 ret <8 x float> %s 1693 } 1694 1695 define <8 x double> @trunc_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1696 ; CHECK-LABEL: trunc_v8f64_mask_load: 1697 ; CHECK: ## %bb.0: 1698 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1699 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 {%k1} 1700 ; CHECK-NEXT: retq 1701 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1702 %p = load <8 x double>, <8 x double>* %ptr 1703 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1704 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1705 ret <8 x double> %s 1706 } 1707 1708 define <16 x float> @trunc_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1709 ; CHECK-LABEL: trunc_v16f32_mask_load: 1710 ; CHECK: ## %bb.0: 1711 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1712 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 {%k1} 1713 ; CHECK-NEXT: retq 1714 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1715 %p = load <16 x float>, <16 x float>* %ptr 1716 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1717 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1718 ret <16 x float> %s 1719 } 1720 1721 define <2 x double> @trunc_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 1722 ; CHECK-LABEL: trunc_v2f64_maskz_load: 1723 ; CHECK: ## %bb.0: 1724 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1725 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %xmm0 {%k1} {z} 1726 ; CHECK-NEXT: retq 1727 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1728 %p = load <2 x double>, <2 x double>* %ptr 1729 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1730 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1731 ret <2 x double> %s 1732 } 1733 1734 define <4 x float> @trunc_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 1735 ; CHECK-LABEL: trunc_v4f32_maskz_load: 1736 ; CHECK: ## %bb.0: 1737 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1738 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %xmm0 {%k1} {z} 1739 ; CHECK-NEXT: retq 1740 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1741 %p = load <4 x float>, <4 x float>* %ptr 1742 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1743 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1744 ret <4 x float> %s 1745 } 1746 1747 define <4 x double> @trunc_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 1748 ; CHECK-LABEL: trunc_v4f64_maskz_load: 1749 ; CHECK: ## %bb.0: 1750 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1751 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %ymm0 {%k1} {z} 1752 ; CHECK-NEXT: retq 1753 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1754 %p = load <4 x double>, <4 x double>* %ptr 1755 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1756 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1757 ret <4 x double> %s 1758 } 1759 1760 define <8 x float> @trunc_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 1761 ; CHECK-LABEL: trunc_v8f32_maskz_load: 1762 ; CHECK: ## %bb.0: 1763 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 1764 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %ymm0 {%k1} {z} 1765 ; CHECK-NEXT: retq 1766 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1767 %p = load <8 x float>, <8 x float>* %ptr 1768 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1769 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1770 ret <8 x float> %s 1771 } 1772 1773 define <8 x double> @trunc_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 1774 ; CHECK-LABEL: trunc_v8f64_maskz_load: 1775 ; CHECK: ## %bb.0: 1776 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1777 ; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 {%k1} {z} 1778 ; CHECK-NEXT: retq 1779 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1780 %p = load <8 x double>, <8 x double>* %ptr 1781 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1782 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1783 ret <8 x double> %s 1784 } 1785 1786 define <16 x float> @trunc_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 1787 ; CHECK-LABEL: trunc_v16f32_maskz_load: 1788 ; CHECK: ## %bb.0: 1789 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1790 ; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 {%k1} {z} 1791 ; CHECK-NEXT: retq 1792 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1793 %p = load <16 x float>, <16 x float>* %ptr 1794 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1795 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1796 ret <16 x float> %s 1797 } 1798 1799 define <2 x double> @trunc_v2f64_broadcast(double* %ptr) { 1800 ; CHECK-LABEL: trunc_v2f64_broadcast: 1801 ; CHECK: ## %bb.0: 1802 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 1803 ; CHECK-NEXT: retq 1804 %ps = load double, double* %ptr 1805 %pins = insertelement <2 x double> undef, double %ps, i32 0 1806 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1807 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1808 ret <2 x double> %t 1809 } 1810 1811 define <4 x float> @trunc_v4f32_broadcast(float* %ptr) { 1812 ; CHECK-LABEL: trunc_v4f32_broadcast: 1813 ; CHECK: ## %bb.0: 1814 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 1815 ; CHECK-NEXT: retq 1816 %ps = load float, float* %ptr 1817 %pins = insertelement <4 x float> undef, float %ps, i32 0 1818 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1819 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1820 ret <4 x float> %t 1821 } 1822 1823 define <4 x double> @trunc_v4f64_broadcast(double* %ptr){ 1824 ; CHECK-LABEL: trunc_v4f64_broadcast: 1825 ; CHECK: ## %bb.0: 1826 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 1827 ; CHECK-NEXT: retq 1828 %ps = load double, double* %ptr 1829 %pins = insertelement <4 x double> undef, double %ps, i32 0 1830 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1831 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1832 ret <4 x double> %t 1833 } 1834 1835 define <8 x float> @trunc_v8f32_broadcast(float* %ptr) { 1836 ; CHECK-LABEL: trunc_v8f32_broadcast: 1837 ; CHECK: ## %bb.0: 1838 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 1839 ; CHECK-NEXT: retq 1840 %ps = load float, float* %ptr 1841 %pins = insertelement <8 x float> undef, float %ps, i32 0 1842 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1843 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1844 ret <8 x float> %t 1845 } 1846 1847 define <8 x double> @trunc_v8f64_broadcast(double* %ptr){ 1848 ; CHECK-LABEL: trunc_v8f64_broadcast: 1849 ; CHECK: ## %bb.0: 1850 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 1851 ; CHECK-NEXT: retq 1852 %ps = load double, double* %ptr 1853 %pins = insertelement <8 x double> undef, double %ps, i32 0 1854 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1855 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1856 ret <8 x double> %t 1857 } 1858 1859 define <16 x float> @trunc_v16f32_broadcast(float* %ptr) { 1860 ; CHECK-LABEL: trunc_v16f32_broadcast: 1861 ; CHECK: ## %bb.0: 1862 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 1863 ; CHECK-NEXT: retq 1864 %ps = load float, float* %ptr 1865 %pins = insertelement <16 x float> undef, float %ps, i32 0 1866 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1867 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1868 ret <16 x float> %t 1869 } 1870 1871 define <2 x double> @trunc_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 1872 ; CHECK-LABEL: trunc_v2f64_mask_broadcast: 1873 ; CHECK: ## %bb.0: 1874 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1875 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} 1876 ; CHECK-NEXT: retq 1877 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1878 %ps = load double, double* %ptr 1879 %pins = insertelement <2 x double> undef, double %ps, i32 0 1880 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1881 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1882 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1883 ret <2 x double> %s 1884 } 1885 1886 define <4 x float> @trunc_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 1887 ; CHECK-LABEL: trunc_v4f32_mask_broadcast: 1888 ; CHECK: ## %bb.0: 1889 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1890 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} 1891 ; CHECK-NEXT: retq 1892 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1893 %ps = load float, float* %ptr 1894 %pins = insertelement <4 x float> undef, float %ps, i32 0 1895 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1896 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1897 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1898 ret <4 x float> %s 1899 } 1900 1901 define <4 x double> @trunc_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 1902 ; CHECK-LABEL: trunc_v4f64_mask_broadcast: 1903 ; CHECK: ## %bb.0: 1904 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1905 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} 1906 ; CHECK-NEXT: retq 1907 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1908 %ps = load double, double* %ptr 1909 %pins = insertelement <4 x double> undef, double %ps, i32 0 1910 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1911 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1912 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1913 ret <4 x double> %s 1914 } 1915 1916 define <8 x float> @trunc_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1917 ; CHECK-LABEL: trunc_v8f32_mask_broadcast: 1918 ; CHECK: ## %bb.0: 1919 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1920 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} 1921 ; CHECK-NEXT: retq 1922 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1923 %ps = load float, float* %ptr 1924 %pins = insertelement <8 x float> undef, float %ps, i32 0 1925 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1926 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1927 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1928 ret <8 x float> %s 1929 } 1930 1931 define <8 x double> @trunc_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1932 ; CHECK-LABEL: trunc_v8f64_mask_broadcast: 1933 ; CHECK: ## %bb.0: 1934 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1935 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} 1936 ; CHECK-NEXT: retq 1937 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1938 %ps = load double, double* %ptr 1939 %pins = insertelement <8 x double> undef, double %ps, i32 0 1940 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1941 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1942 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1943 ret <8 x double> %s 1944 } 1945 1946 define <16 x float> @trunc_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1947 ; CHECK-LABEL: trunc_v16f32_mask_broadcast: 1948 ; CHECK: ## %bb.0: 1949 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1950 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} 1951 ; CHECK-NEXT: retq 1952 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1953 %ps = load float, float* %ptr 1954 %pins = insertelement <16 x float> undef, float %ps, i32 0 1955 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1956 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1957 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1958 ret <16 x float> %s 1959 } 1960 1961 define <2 x double> @trunc_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 1962 ; CHECK-LABEL: trunc_v2f64_maskz_broadcast: 1963 ; CHECK: ## %bb.0: 1964 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1965 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} {z} 1966 ; CHECK-NEXT: retq 1967 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1968 %ps = load double, double* %ptr 1969 %pins = insertelement <2 x double> undef, double %ps, i32 0 1970 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1971 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1972 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1973 ret <2 x double> %s 1974 } 1975 1976 define <4 x float> @trunc_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 1977 ; CHECK-LABEL: trunc_v4f32_maskz_broadcast: 1978 ; CHECK: ## %bb.0: 1979 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1980 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} {z} 1981 ; CHECK-NEXT: retq 1982 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1983 %ps = load float, float* %ptr 1984 %pins = insertelement <4 x float> undef, float %ps, i32 0 1985 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1986 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1987 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1988 ret <4 x float> %s 1989 } 1990 1991 define <4 x double> @trunc_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 1992 ; CHECK-LABEL: trunc_v4f64_maskz_broadcast: 1993 ; CHECK: ## %bb.0: 1994 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1995 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} {z} 1996 ; CHECK-NEXT: retq 1997 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1998 %ps = load double, double* %ptr 1999 %pins = insertelement <4 x double> undef, double %ps, i32 0 2000 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2001 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 2002 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2003 ret <4 x double> %s 2004 } 2005 2006 define <8 x float> @trunc_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 2007 ; CHECK-LABEL: trunc_v8f32_maskz_broadcast: 2008 ; CHECK: ## %bb.0: 2009 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2010 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} {z} 2011 ; CHECK-NEXT: retq 2012 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2013 %ps = load float, float* %ptr 2014 %pins = insertelement <8 x float> undef, float %ps, i32 0 2015 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2016 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 2017 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2018 ret <8 x float> %s 2019 } 2020 2021 define <8 x double> @trunc_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 2022 ; CHECK-LABEL: trunc_v8f64_maskz_broadcast: 2023 ; CHECK: ## %bb.0: 2024 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2025 ; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} {z} 2026 ; CHECK-NEXT: retq 2027 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2028 %ps = load double, double* %ptr 2029 %pins = insertelement <8 x double> undef, double %ps, i32 0 2030 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2031 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 2032 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2033 ret <8 x double> %s 2034 } 2035 2036 define <16 x float> @trunc_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 2037 ; CHECK-LABEL: trunc_v16f32_maskz_broadcast: 2038 ; CHECK: ## %bb.0: 2039 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2040 ; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} {z} 2041 ; CHECK-NEXT: retq 2042 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2043 %ps = load float, float* %ptr 2044 %pins = insertelement <16 x float> undef, float %ps, i32 0 2045 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2046 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 2047 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2048 ret <16 x float> %s 2049 } 2050 2051 define <2 x double> @rint_v2f64(<2 x double> %p) { 2052 ; CHECK-LABEL: rint_v2f64: 2053 ; CHECK: ## %bb.0: 2054 ; CHECK-NEXT: vroundpd $4, %xmm0, %xmm0 2055 ; CHECK-NEXT: retq 2056 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2057 ret <2 x double> %t 2058 } 2059 2060 define <4 x float> @rint_v4f32(<4 x float> %p) { 2061 ; CHECK-LABEL: rint_v4f32: 2062 ; CHECK: ## %bb.0: 2063 ; CHECK-NEXT: vroundps $4, %xmm0, %xmm0 2064 ; CHECK-NEXT: retq 2065 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2066 ret <4 x float> %t 2067 } 2068 2069 define <4 x double> @rint_v4f64(<4 x double> %p){ 2070 ; CHECK-LABEL: rint_v4f64: 2071 ; CHECK: ## %bb.0: 2072 ; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0 2073 ; CHECK-NEXT: retq 2074 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2075 ret <4 x double> %t 2076 } 2077 2078 define <8 x float> @rint_v8f32(<8 x float> %p) { 2079 ; CHECK-LABEL: rint_v8f32: 2080 ; CHECK: ## %bb.0: 2081 ; CHECK-NEXT: vroundps $4, %ymm0, %ymm0 2082 ; CHECK-NEXT: retq 2083 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2084 ret <8 x float> %t 2085 } 2086 2087 define <8 x double> @rint_v8f64(<8 x double> %p){ 2088 ; CHECK-LABEL: rint_v8f64: 2089 ; CHECK: ## %bb.0: 2090 ; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0 2091 ; CHECK-NEXT: retq 2092 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2093 ret <8 x double> %t 2094 } 2095 2096 define <16 x float> @rint_v16f32(<16 x float> %p) { 2097 ; CHECK-LABEL: rint_v16f32: 2098 ; CHECK: ## %bb.0: 2099 ; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0 2100 ; CHECK-NEXT: retq 2101 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2102 ret <16 x float> %t 2103 } 2104 2105 define <2 x double> @rint_v2f64_load(<2 x double>* %ptr) { 2106 ; CHECK-LABEL: rint_v2f64_load: 2107 ; CHECK: ## %bb.0: 2108 ; CHECK-NEXT: vroundpd $4, (%rdi), %xmm0 2109 ; CHECK-NEXT: retq 2110 %p = load <2 x double>, <2 x double>* %ptr 2111 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2112 ret <2 x double> %t 2113 } 2114 2115 define <4 x float> @rint_v4f32_load(<4 x float>* %ptr) { 2116 ; CHECK-LABEL: rint_v4f32_load: 2117 ; CHECK: ## %bb.0: 2118 ; CHECK-NEXT: vroundps $4, (%rdi), %xmm0 2119 ; CHECK-NEXT: retq 2120 %p = load <4 x float>, <4 x float>* %ptr 2121 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2122 ret <4 x float> %t 2123 } 2124 2125 define <4 x double> @rint_v4f64_load(<4 x double>* %ptr){ 2126 ; CHECK-LABEL: rint_v4f64_load: 2127 ; CHECK: ## %bb.0: 2128 ; CHECK-NEXT: vroundpd $4, (%rdi), %ymm0 2129 ; CHECK-NEXT: retq 2130 %p = load <4 x double>, <4 x double>* %ptr 2131 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2132 ret <4 x double> %t 2133 } 2134 2135 define <8 x float> @rint_v8f32_load(<8 x float>* %ptr) { 2136 ; CHECK-LABEL: rint_v8f32_load: 2137 ; CHECK: ## %bb.0: 2138 ; CHECK-NEXT: vroundps $4, (%rdi), %ymm0 2139 ; CHECK-NEXT: retq 2140 %p = load <8 x float>, <8 x float>* %ptr 2141 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2142 ret <8 x float> %t 2143 } 2144 2145 define <8 x double> @rint_v8f64_load(<8 x double>* %ptr){ 2146 ; CHECK-LABEL: rint_v8f64_load: 2147 ; CHECK: ## %bb.0: 2148 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 2149 ; CHECK-NEXT: retq 2150 %p = load <8 x double>, <8 x double>* %ptr 2151 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2152 ret <8 x double> %t 2153 } 2154 2155 define <16 x float> @rint_v16f32_load(<16 x float>* %ptr) { 2156 ; CHECK-LABEL: rint_v16f32_load: 2157 ; CHECK: ## %bb.0: 2158 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 2159 ; CHECK-NEXT: retq 2160 %p = load <16 x float>, <16 x float>* %ptr 2161 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2162 ret <16 x float> %t 2163 } 2164 2165 define <2 x double> @rint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 2166 ; CHECK-LABEL: rint_v2f64_mask: 2167 ; CHECK: ## %bb.0: 2168 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 2169 ; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1} 2170 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 2171 ; CHECK-NEXT: retq 2172 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2173 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2174 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2175 ret <2 x double> %s 2176 } 2177 2178 define <4 x float> @rint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 2179 ; CHECK-LABEL: rint_v4f32_mask: 2180 ; CHECK: ## %bb.0: 2181 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2182 ; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm1 {%k1} 2183 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 2184 ; CHECK-NEXT: retq 2185 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2186 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2187 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2188 ret <4 x float> %s 2189 } 2190 2191 define <4 x double> @rint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 2192 ; CHECK-LABEL: rint_v4f64_mask: 2193 ; CHECK: ## %bb.0: 2194 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 2195 ; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1} 2196 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 2197 ; CHECK-NEXT: retq 2198 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2199 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2200 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2201 ret <4 x double> %s 2202 } 2203 2204 define <8 x float> @rint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 2205 ; CHECK-LABEL: rint_v8f32_mask: 2206 ; CHECK: ## %bb.0: 2207 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2208 ; CHECK-NEXT: vrndscaleps $4, %ymm0, %ymm1 {%k1} 2209 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 2210 ; CHECK-NEXT: retq 2211 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2212 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2213 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2214 ret <8 x float> %s 2215 } 2216 2217 define <8 x double> @rint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 2218 ; CHECK-LABEL: rint_v8f64_mask: 2219 ; CHECK: ## %bb.0: 2220 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 2221 ; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm1 {%k1} 2222 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 2223 ; CHECK-NEXT: retq 2224 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2225 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2226 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2227 ret <8 x double> %s 2228 } 2229 2230 define <16 x float> @rint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 2231 ; CHECK-LABEL: rint_v16f32_mask: 2232 ; CHECK: ## %bb.0: 2233 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2234 ; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm1 {%k1} 2235 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2236 ; CHECK-NEXT: retq 2237 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2238 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2239 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2240 ret <16 x float> %s 2241 } 2242 2243 define <2 x double> @rint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 2244 ; CHECK-LABEL: rint_v2f64_maskz: 2245 ; CHECK: ## %bb.0: 2246 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2247 ; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm0 {%k1} {z} 2248 ; CHECK-NEXT: retq 2249 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2250 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2251 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2252 ret <2 x double> %s 2253 } 2254 2255 define <4 x float> @rint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 2256 ; CHECK-LABEL: rint_v4f32_maskz: 2257 ; CHECK: ## %bb.0: 2258 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2259 ; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm0 {%k1} {z} 2260 ; CHECK-NEXT: retq 2261 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2262 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2263 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2264 ret <4 x float> %s 2265 } 2266 2267 define <4 x double> @rint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 2268 ; CHECK-LABEL: rint_v4f64_maskz: 2269 ; CHECK: ## %bb.0: 2270 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2271 ; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm0 {%k1} {z} 2272 ; CHECK-NEXT: retq 2273 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2274 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2275 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2276 ret <4 x double> %s 2277 } 2278 2279 define <8 x float> @rint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 2280 ; CHECK-LABEL: rint_v8f32_maskz: 2281 ; CHECK: ## %bb.0: 2282 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2283 ; CHECK-NEXT: vrndscaleps $4, %ymm0, %ymm0 {%k1} {z} 2284 ; CHECK-NEXT: retq 2285 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2286 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2287 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2288 ret <8 x float> %s 2289 } 2290 2291 define <8 x double> @rint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 2292 ; CHECK-LABEL: rint_v8f64_maskz: 2293 ; CHECK: ## %bb.0: 2294 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2295 ; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0 {%k1} {z} 2296 ; CHECK-NEXT: retq 2297 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2298 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2299 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2300 ret <8 x double> %s 2301 } 2302 2303 define <16 x float> @rint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 2304 ; CHECK-LABEL: rint_v16f32_maskz: 2305 ; CHECK: ## %bb.0: 2306 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2307 ; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0 {%k1} {z} 2308 ; CHECK-NEXT: retq 2309 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2310 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2311 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2312 ret <16 x float> %s 2313 } 2314 2315 define <2 x double> @rint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 2316 ; CHECK-LABEL: rint_v2f64_mask_load: 2317 ; CHECK: ## %bb.0: 2318 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2319 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %xmm0 {%k1} 2320 ; CHECK-NEXT: retq 2321 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2322 %p = load <2 x double>, <2 x double>* %ptr 2323 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2324 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2325 ret <2 x double> %s 2326 } 2327 2328 define <4 x float> @rint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 2329 ; CHECK-LABEL: rint_v4f32_mask_load: 2330 ; CHECK: ## %bb.0: 2331 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2332 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %xmm0 {%k1} 2333 ; CHECK-NEXT: retq 2334 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2335 %p = load <4 x float>, <4 x float>* %ptr 2336 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2337 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2338 ret <4 x float> %s 2339 } 2340 2341 define <4 x double> @rint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 2342 ; CHECK-LABEL: rint_v4f64_mask_load: 2343 ; CHECK: ## %bb.0: 2344 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2345 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %ymm0 {%k1} 2346 ; CHECK-NEXT: retq 2347 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2348 %p = load <4 x double>, <4 x double>* %ptr 2349 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2350 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2351 ret <4 x double> %s 2352 } 2353 2354 define <8 x float> @rint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 2355 ; CHECK-LABEL: rint_v8f32_mask_load: 2356 ; CHECK: ## %bb.0: 2357 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2358 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %ymm0 {%k1} 2359 ; CHECK-NEXT: retq 2360 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2361 %p = load <8 x float>, <8 x float>* %ptr 2362 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2363 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2364 ret <8 x float> %s 2365 } 2366 2367 define <8 x double> @rint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 2368 ; CHECK-LABEL: rint_v8f64_mask_load: 2369 ; CHECK: ## %bb.0: 2370 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2371 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 {%k1} 2372 ; CHECK-NEXT: retq 2373 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2374 %p = load <8 x double>, <8 x double>* %ptr 2375 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2376 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2377 ret <8 x double> %s 2378 } 2379 2380 define <16 x float> @rint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 2381 ; CHECK-LABEL: rint_v16f32_mask_load: 2382 ; CHECK: ## %bb.0: 2383 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2384 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 {%k1} 2385 ; CHECK-NEXT: retq 2386 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2387 %p = load <16 x float>, <16 x float>* %ptr 2388 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2389 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2390 ret <16 x float> %s 2391 } 2392 2393 define <2 x double> @rint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 2394 ; CHECK-LABEL: rint_v2f64_maskz_load: 2395 ; CHECK: ## %bb.0: 2396 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 2397 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %xmm0 {%k1} {z} 2398 ; CHECK-NEXT: retq 2399 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2400 %p = load <2 x double>, <2 x double>* %ptr 2401 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2402 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2403 ret <2 x double> %s 2404 } 2405 2406 define <4 x float> @rint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 2407 ; CHECK-LABEL: rint_v4f32_maskz_load: 2408 ; CHECK: ## %bb.0: 2409 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2410 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %xmm0 {%k1} {z} 2411 ; CHECK-NEXT: retq 2412 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2413 %p = load <4 x float>, <4 x float>* %ptr 2414 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2415 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2416 ret <4 x float> %s 2417 } 2418 2419 define <4 x double> @rint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 2420 ; CHECK-LABEL: rint_v4f64_maskz_load: 2421 ; CHECK: ## %bb.0: 2422 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2423 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %ymm0 {%k1} {z} 2424 ; CHECK-NEXT: retq 2425 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2426 %p = load <4 x double>, <4 x double>* %ptr 2427 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2428 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2429 ret <4 x double> %s 2430 } 2431 2432 define <8 x float> @rint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 2433 ; CHECK-LABEL: rint_v8f32_maskz_load: 2434 ; CHECK: ## %bb.0: 2435 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2436 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %ymm0 {%k1} {z} 2437 ; CHECK-NEXT: retq 2438 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2439 %p = load <8 x float>, <8 x float>* %ptr 2440 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2441 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2442 ret <8 x float> %s 2443 } 2444 2445 define <8 x double> @rint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 2446 ; CHECK-LABEL: rint_v8f64_maskz_load: 2447 ; CHECK: ## %bb.0: 2448 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2449 ; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 {%k1} {z} 2450 ; CHECK-NEXT: retq 2451 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2452 %p = load <8 x double>, <8 x double>* %ptr 2453 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2454 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2455 ret <8 x double> %s 2456 } 2457 2458 define <16 x float> @rint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 2459 ; CHECK-LABEL: rint_v16f32_maskz_load: 2460 ; CHECK: ## %bb.0: 2461 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2462 ; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 {%k1} {z} 2463 ; CHECK-NEXT: retq 2464 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2465 %p = load <16 x float>, <16 x float>* %ptr 2466 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2467 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2468 ret <16 x float> %s 2469 } 2470 2471 define <2 x double> @rint_v2f64_broadcast(double* %ptr) { 2472 ; CHECK-LABEL: rint_v2f64_broadcast: 2473 ; CHECK: ## %bb.0: 2474 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 2475 ; CHECK-NEXT: retq 2476 %ps = load double, double* %ptr 2477 %pins = insertelement <2 x double> undef, double %ps, i32 0 2478 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 2479 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2480 ret <2 x double> %t 2481 } 2482 2483 define <4 x float> @rint_v4f32_broadcast(float* %ptr) { 2484 ; CHECK-LABEL: rint_v4f32_broadcast: 2485 ; CHECK: ## %bb.0: 2486 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 2487 ; CHECK-NEXT: retq 2488 %ps = load float, float* %ptr 2489 %pins = insertelement <4 x float> undef, float %ps, i32 0 2490 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 2491 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2492 ret <4 x float> %t 2493 } 2494 2495 define <4 x double> @rint_v4f64_broadcast(double* %ptr){ 2496 ; CHECK-LABEL: rint_v4f64_broadcast: 2497 ; CHECK: ## %bb.0: 2498 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 2499 ; CHECK-NEXT: retq 2500 %ps = load double, double* %ptr 2501 %pins = insertelement <4 x double> undef, double %ps, i32 0 2502 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2503 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2504 ret <4 x double> %t 2505 } 2506 2507 define <8 x float> @rint_v8f32_broadcast(float* %ptr) { 2508 ; CHECK-LABEL: rint_v8f32_broadcast: 2509 ; CHECK: ## %bb.0: 2510 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 2511 ; CHECK-NEXT: retq 2512 %ps = load float, float* %ptr 2513 %pins = insertelement <8 x float> undef, float %ps, i32 0 2514 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2515 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2516 ret <8 x float> %t 2517 } 2518 2519 define <8 x double> @rint_v8f64_broadcast(double* %ptr){ 2520 ; CHECK-LABEL: rint_v8f64_broadcast: 2521 ; CHECK: ## %bb.0: 2522 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 2523 ; CHECK-NEXT: retq 2524 %ps = load double, double* %ptr 2525 %pins = insertelement <8 x double> undef, double %ps, i32 0 2526 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2527 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2528 ret <8 x double> %t 2529 } 2530 2531 define <16 x float> @rint_v16f32_broadcast(float* %ptr) { 2532 ; CHECK-LABEL: rint_v16f32_broadcast: 2533 ; CHECK: ## %bb.0: 2534 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 2535 ; CHECK-NEXT: retq 2536 %ps = load float, float* %ptr 2537 %pins = insertelement <16 x float> undef, float %ps, i32 0 2538 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2539 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2540 ret <16 x float> %t 2541 } 2542 2543 define <2 x double> @rint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 2544 ; CHECK-LABEL: rint_v2f64_mask_broadcast: 2545 ; CHECK: ## %bb.0: 2546 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2547 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} 2548 ; CHECK-NEXT: retq 2549 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2550 %ps = load double, double* %ptr 2551 %pins = insertelement <2 x double> undef, double %ps, i32 0 2552 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 2553 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2554 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2555 ret <2 x double> %s 2556 } 2557 2558 define <4 x float> @rint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 2559 ; CHECK-LABEL: rint_v4f32_mask_broadcast: 2560 ; CHECK: ## %bb.0: 2561 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2562 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} 2563 ; CHECK-NEXT: retq 2564 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2565 %ps = load float, float* %ptr 2566 %pins = insertelement <4 x float> undef, float %ps, i32 0 2567 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 2568 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2569 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2570 ret <4 x float> %s 2571 } 2572 2573 define <4 x double> @rint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 2574 ; CHECK-LABEL: rint_v4f64_mask_broadcast: 2575 ; CHECK: ## %bb.0: 2576 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2577 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} 2578 ; CHECK-NEXT: retq 2579 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2580 %ps = load double, double* %ptr 2581 %pins = insertelement <4 x double> undef, double %ps, i32 0 2582 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2583 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2584 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2585 ret <4 x double> %s 2586 } 2587 2588 define <8 x float> @rint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 2589 ; CHECK-LABEL: rint_v8f32_mask_broadcast: 2590 ; CHECK: ## %bb.0: 2591 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2592 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} 2593 ; CHECK-NEXT: retq 2594 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2595 %ps = load float, float* %ptr 2596 %pins = insertelement <8 x float> undef, float %ps, i32 0 2597 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2598 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2599 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2600 ret <8 x float> %s 2601 } 2602 2603 define <8 x double> @rint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 2604 ; CHECK-LABEL: rint_v8f64_mask_broadcast: 2605 ; CHECK: ## %bb.0: 2606 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2607 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} 2608 ; CHECK-NEXT: retq 2609 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2610 %ps = load double, double* %ptr 2611 %pins = insertelement <8 x double> undef, double %ps, i32 0 2612 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2613 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2614 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2615 ret <8 x double> %s 2616 } 2617 2618 define <16 x float> @rint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 2619 ; CHECK-LABEL: rint_v16f32_mask_broadcast: 2620 ; CHECK: ## %bb.0: 2621 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2622 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} 2623 ; CHECK-NEXT: retq 2624 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2625 %ps = load float, float* %ptr 2626 %pins = insertelement <16 x float> undef, float %ps, i32 0 2627 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2628 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2629 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2630 ret <16 x float> %s 2631 } 2632 2633 define <2 x double> @rint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 2634 ; CHECK-LABEL: rint_v2f64_maskz_broadcast: 2635 ; CHECK: ## %bb.0: 2636 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 2637 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} {z} 2638 ; CHECK-NEXT: retq 2639 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2640 %ps = load double, double* %ptr 2641 %pins = insertelement <2 x double> undef, double %ps, i32 0 2642 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 2643 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2644 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2645 ret <2 x double> %s 2646 } 2647 2648 define <4 x float> @rint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 2649 ; CHECK-LABEL: rint_v4f32_maskz_broadcast: 2650 ; CHECK: ## %bb.0: 2651 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2652 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} {z} 2653 ; CHECK-NEXT: retq 2654 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2655 %ps = load float, float* %ptr 2656 %pins = insertelement <4 x float> undef, float %ps, i32 0 2657 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 2658 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2659 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2660 ret <4 x float> %s 2661 } 2662 2663 define <4 x double> @rint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 2664 ; CHECK-LABEL: rint_v4f64_maskz_broadcast: 2665 ; CHECK: ## %bb.0: 2666 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2667 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} {z} 2668 ; CHECK-NEXT: retq 2669 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2670 %ps = load double, double* %ptr 2671 %pins = insertelement <4 x double> undef, double %ps, i32 0 2672 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2673 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2674 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2675 ret <4 x double> %s 2676 } 2677 2678 define <8 x float> @rint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 2679 ; CHECK-LABEL: rint_v8f32_maskz_broadcast: 2680 ; CHECK: ## %bb.0: 2681 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2682 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} {z} 2683 ; CHECK-NEXT: retq 2684 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2685 %ps = load float, float* %ptr 2686 %pins = insertelement <8 x float> undef, float %ps, i32 0 2687 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2688 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2689 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2690 ret <8 x float> %s 2691 } 2692 2693 define <8 x double> @rint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 2694 ; CHECK-LABEL: rint_v8f64_maskz_broadcast: 2695 ; CHECK: ## %bb.0: 2696 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2697 ; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} {z} 2698 ; CHECK-NEXT: retq 2699 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2700 %ps = load double, double* %ptr 2701 %pins = insertelement <8 x double> undef, double %ps, i32 0 2702 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2703 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2704 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2705 ret <8 x double> %s 2706 } 2707 2708 define <16 x float> @rint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 2709 ; CHECK-LABEL: rint_v16f32_maskz_broadcast: 2710 ; CHECK: ## %bb.0: 2711 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2712 ; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} {z} 2713 ; CHECK-NEXT: retq 2714 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2715 %ps = load float, float* %ptr 2716 %pins = insertelement <16 x float> undef, float %ps, i32 0 2717 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2718 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2719 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2720 ret <16 x float> %s 2721 } 2722 2723 define <2 x double> @nearbyint_v2f64(<2 x double> %p) { 2724 ; CHECK-LABEL: nearbyint_v2f64: 2725 ; CHECK: ## %bb.0: 2726 ; CHECK-NEXT: vroundpd $12, %xmm0, %xmm0 2727 ; CHECK-NEXT: retq 2728 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2729 ret <2 x double> %t 2730 } 2731 2732 define <4 x float> @nearbyint_v4f32(<4 x float> %p) { 2733 ; CHECK-LABEL: nearbyint_v4f32: 2734 ; CHECK: ## %bb.0: 2735 ; CHECK-NEXT: vroundps $12, %xmm0, %xmm0 2736 ; CHECK-NEXT: retq 2737 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2738 ret <4 x float> %t 2739 } 2740 2741 define <4 x double> @nearbyint_v4f64(<4 x double> %p){ 2742 ; CHECK-LABEL: nearbyint_v4f64: 2743 ; CHECK: ## %bb.0: 2744 ; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0 2745 ; CHECK-NEXT: retq 2746 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2747 ret <4 x double> %t 2748 } 2749 2750 define <8 x float> @nearbyint_v8f32(<8 x float> %p) { 2751 ; CHECK-LABEL: nearbyint_v8f32: 2752 ; CHECK: ## %bb.0: 2753 ; CHECK-NEXT: vroundps $12, %ymm0, %ymm0 2754 ; CHECK-NEXT: retq 2755 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2756 ret <8 x float> %t 2757 } 2758 2759 define <8 x double> @nearbyint_v8f64(<8 x double> %p){ 2760 ; CHECK-LABEL: nearbyint_v8f64: 2761 ; CHECK: ## %bb.0: 2762 ; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0 2763 ; CHECK-NEXT: retq 2764 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2765 ret <8 x double> %t 2766 } 2767 2768 define <16 x float> @nearbyint_v16f32(<16 x float> %p) { 2769 ; CHECK-LABEL: nearbyint_v16f32: 2770 ; CHECK: ## %bb.0: 2771 ; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0 2772 ; CHECK-NEXT: retq 2773 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2774 ret <16 x float> %t 2775 } 2776 2777 define <2 x double> @nearbyint_v2f64_load(<2 x double>* %ptr) { 2778 ; CHECK-LABEL: nearbyint_v2f64_load: 2779 ; CHECK: ## %bb.0: 2780 ; CHECK-NEXT: vroundpd $12, (%rdi), %xmm0 2781 ; CHECK-NEXT: retq 2782 %p = load <2 x double>, <2 x double>* %ptr 2783 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2784 ret <2 x double> %t 2785 } 2786 2787 define <4 x float> @nearbyint_v4f32_load(<4 x float>* %ptr) { 2788 ; CHECK-LABEL: nearbyint_v4f32_load: 2789 ; CHECK: ## %bb.0: 2790 ; CHECK-NEXT: vroundps $12, (%rdi), %xmm0 2791 ; CHECK-NEXT: retq 2792 %p = load <4 x float>, <4 x float>* %ptr 2793 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2794 ret <4 x float> %t 2795 } 2796 2797 define <4 x double> @nearbyint_v4f64_load(<4 x double>* %ptr){ 2798 ; CHECK-LABEL: nearbyint_v4f64_load: 2799 ; CHECK: ## %bb.0: 2800 ; CHECK-NEXT: vroundpd $12, (%rdi), %ymm0 2801 ; CHECK-NEXT: retq 2802 %p = load <4 x double>, <4 x double>* %ptr 2803 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2804 ret <4 x double> %t 2805 } 2806 2807 define <8 x float> @nearbyint_v8f32_load(<8 x float>* %ptr) { 2808 ; CHECK-LABEL: nearbyint_v8f32_load: 2809 ; CHECK: ## %bb.0: 2810 ; CHECK-NEXT: vroundps $12, (%rdi), %ymm0 2811 ; CHECK-NEXT: retq 2812 %p = load <8 x float>, <8 x float>* %ptr 2813 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2814 ret <8 x float> %t 2815 } 2816 2817 define <8 x double> @nearbyint_v8f64_load(<8 x double>* %ptr){ 2818 ; CHECK-LABEL: nearbyint_v8f64_load: 2819 ; CHECK: ## %bb.0: 2820 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 2821 ; CHECK-NEXT: retq 2822 %p = load <8 x double>, <8 x double>* %ptr 2823 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2824 ret <8 x double> %t 2825 } 2826 2827 define <16 x float> @nearbyint_v16f32_load(<16 x float>* %ptr) { 2828 ; CHECK-LABEL: nearbyint_v16f32_load: 2829 ; CHECK: ## %bb.0: 2830 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 2831 ; CHECK-NEXT: retq 2832 %p = load <16 x float>, <16 x float>* %ptr 2833 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2834 ret <16 x float> %t 2835 } 2836 2837 define <2 x double> @nearbyint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 2838 ; CHECK-LABEL: nearbyint_v2f64_mask: 2839 ; CHECK: ## %bb.0: 2840 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 2841 ; CHECK-NEXT: vrndscalepd $12, %xmm0, %xmm1 {%k1} 2842 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 2843 ; CHECK-NEXT: retq 2844 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2845 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2846 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2847 ret <2 x double> %s 2848 } 2849 2850 define <4 x float> @nearbyint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 2851 ; CHECK-LABEL: nearbyint_v4f32_mask: 2852 ; CHECK: ## %bb.0: 2853 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2854 ; CHECK-NEXT: vrndscaleps $12, %xmm0, %xmm1 {%k1} 2855 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 2856 ; CHECK-NEXT: retq 2857 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2858 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2859 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2860 ret <4 x float> %s 2861 } 2862 2863 define <4 x double> @nearbyint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 2864 ; CHECK-LABEL: nearbyint_v4f64_mask: 2865 ; CHECK: ## %bb.0: 2866 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 2867 ; CHECK-NEXT: vrndscalepd $12, %ymm0, %ymm1 {%k1} 2868 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 2869 ; CHECK-NEXT: retq 2870 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2871 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2872 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2873 ret <4 x double> %s 2874 } 2875 2876 define <8 x float> @nearbyint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 2877 ; CHECK-LABEL: nearbyint_v8f32_mask: 2878 ; CHECK: ## %bb.0: 2879 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2880 ; CHECK-NEXT: vrndscaleps $12, %ymm0, %ymm1 {%k1} 2881 ; CHECK-NEXT: vmovaps %ymm1, %ymm0 2882 ; CHECK-NEXT: retq 2883 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2884 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2885 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2886 ret <8 x float> %s 2887 } 2888 2889 define <8 x double> @nearbyint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 2890 ; CHECK-LABEL: nearbyint_v8f64_mask: 2891 ; CHECK: ## %bb.0: 2892 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 2893 ; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm1 {%k1} 2894 ; CHECK-NEXT: vmovapd %zmm1, %zmm0 2895 ; CHECK-NEXT: retq 2896 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2897 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2898 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2899 ret <8 x double> %s 2900 } 2901 2902 define <16 x float> @nearbyint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 2903 ; CHECK-LABEL: nearbyint_v16f32_mask: 2904 ; CHECK: ## %bb.0: 2905 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2906 ; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm1 {%k1} 2907 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 2908 ; CHECK-NEXT: retq 2909 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2910 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2911 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2912 ret <16 x float> %s 2913 } 2914 2915 define <2 x double> @nearbyint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 2916 ; CHECK-LABEL: nearbyint_v2f64_maskz: 2917 ; CHECK: ## %bb.0: 2918 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2919 ; CHECK-NEXT: vrndscalepd $12, %xmm0, %xmm0 {%k1} {z} 2920 ; CHECK-NEXT: retq 2921 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2922 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2923 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2924 ret <2 x double> %s 2925 } 2926 2927 define <4 x float> @nearbyint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 2928 ; CHECK-LABEL: nearbyint_v4f32_maskz: 2929 ; CHECK: ## %bb.0: 2930 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2931 ; CHECK-NEXT: vrndscaleps $12, %xmm0, %xmm0 {%k1} {z} 2932 ; CHECK-NEXT: retq 2933 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2934 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2935 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2936 ret <4 x float> %s 2937 } 2938 2939 define <4 x double> @nearbyint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 2940 ; CHECK-LABEL: nearbyint_v4f64_maskz: 2941 ; CHECK: ## %bb.0: 2942 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2943 ; CHECK-NEXT: vrndscalepd $12, %ymm0, %ymm0 {%k1} {z} 2944 ; CHECK-NEXT: retq 2945 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2946 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2947 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2948 ret <4 x double> %s 2949 } 2950 2951 define <8 x float> @nearbyint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 2952 ; CHECK-LABEL: nearbyint_v8f32_maskz: 2953 ; CHECK: ## %bb.0: 2954 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2955 ; CHECK-NEXT: vrndscaleps $12, %ymm0, %ymm0 {%k1} {z} 2956 ; CHECK-NEXT: retq 2957 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2958 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2959 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2960 ret <8 x float> %s 2961 } 2962 2963 define <8 x double> @nearbyint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 2964 ; CHECK-LABEL: nearbyint_v8f64_maskz: 2965 ; CHECK: ## %bb.0: 2966 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2967 ; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0 {%k1} {z} 2968 ; CHECK-NEXT: retq 2969 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2970 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2971 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2972 ret <8 x double> %s 2973 } 2974 2975 define <16 x float> @nearbyint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 2976 ; CHECK-LABEL: nearbyint_v16f32_maskz: 2977 ; CHECK: ## %bb.0: 2978 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2979 ; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0 {%k1} {z} 2980 ; CHECK-NEXT: retq 2981 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2982 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2983 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2984 ret <16 x float> %s 2985 } 2986 2987 define <2 x double> @nearbyint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 2988 ; CHECK-LABEL: nearbyint_v2f64_mask_load: 2989 ; CHECK: ## %bb.0: 2990 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2991 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %xmm0 {%k1} 2992 ; CHECK-NEXT: retq 2993 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2994 %p = load <2 x double>, <2 x double>* %ptr 2995 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2996 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2997 ret <2 x double> %s 2998 } 2999 3000 define <4 x float> @nearbyint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 3001 ; CHECK-LABEL: nearbyint_v4f32_mask_load: 3002 ; CHECK: ## %bb.0: 3003 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 3004 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %xmm0 {%k1} 3005 ; CHECK-NEXT: retq 3006 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3007 %p = load <4 x float>, <4 x float>* %ptr 3008 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3009 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 3010 ret <4 x float> %s 3011 } 3012 3013 define <4 x double> @nearbyint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 3014 ; CHECK-LABEL: nearbyint_v4f64_mask_load: 3015 ; CHECK: ## %bb.0: 3016 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 3017 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %ymm0 {%k1} 3018 ; CHECK-NEXT: retq 3019 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3020 %p = load <4 x double>, <4 x double>* %ptr 3021 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3022 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 3023 ret <4 x double> %s 3024 } 3025 3026 define <8 x float> @nearbyint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 3027 ; CHECK-LABEL: nearbyint_v8f32_mask_load: 3028 ; CHECK: ## %bb.0: 3029 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 3030 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %ymm0 {%k1} 3031 ; CHECK-NEXT: retq 3032 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3033 %p = load <8 x float>, <8 x float>* %ptr 3034 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3035 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 3036 ret <8 x float> %s 3037 } 3038 3039 define <8 x double> @nearbyint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 3040 ; CHECK-LABEL: nearbyint_v8f64_mask_load: 3041 ; CHECK: ## %bb.0: 3042 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 3043 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 {%k1} 3044 ; CHECK-NEXT: retq 3045 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3046 %p = load <8 x double>, <8 x double>* %ptr 3047 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3048 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 3049 ret <8 x double> %s 3050 } 3051 3052 define <16 x float> @nearbyint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 3053 ; CHECK-LABEL: nearbyint_v16f32_mask_load: 3054 ; CHECK: ## %bb.0: 3055 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 3056 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 {%k1} 3057 ; CHECK-NEXT: retq 3058 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3059 %p = load <16 x float>, <16 x float>* %ptr 3060 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3061 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 3062 ret <16 x float> %s 3063 } 3064 3065 define <2 x double> @nearbyint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 3066 ; CHECK-LABEL: nearbyint_v2f64_maskz_load: 3067 ; CHECK: ## %bb.0: 3068 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 3069 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %xmm0 {%k1} {z} 3070 ; CHECK-NEXT: retq 3071 %c = icmp eq <2 x i64> %cmp, zeroinitializer 3072 %p = load <2 x double>, <2 x double>* %ptr 3073 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3074 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 3075 ret <2 x double> %s 3076 } 3077 3078 define <4 x float> @nearbyint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 3079 ; CHECK-LABEL: nearbyint_v4f32_maskz_load: 3080 ; CHECK: ## %bb.0: 3081 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 3082 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %xmm0 {%k1} {z} 3083 ; CHECK-NEXT: retq 3084 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3085 %p = load <4 x float>, <4 x float>* %ptr 3086 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3087 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 3088 ret <4 x float> %s 3089 } 3090 3091 define <4 x double> @nearbyint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 3092 ; CHECK-LABEL: nearbyint_v4f64_maskz_load: 3093 ; CHECK: ## %bb.0: 3094 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 3095 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %ymm0 {%k1} {z} 3096 ; CHECK-NEXT: retq 3097 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3098 %p = load <4 x double>, <4 x double>* %ptr 3099 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3100 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 3101 ret <4 x double> %s 3102 } 3103 3104 define <8 x float> @nearbyint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 3105 ; CHECK-LABEL: nearbyint_v8f32_maskz_load: 3106 ; CHECK: ## %bb.0: 3107 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 3108 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %ymm0 {%k1} {z} 3109 ; CHECK-NEXT: retq 3110 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3111 %p = load <8 x float>, <8 x float>* %ptr 3112 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3113 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 3114 ret <8 x float> %s 3115 } 3116 3117 define <8 x double> @nearbyint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 3118 ; CHECK-LABEL: nearbyint_v8f64_maskz_load: 3119 ; CHECK: ## %bb.0: 3120 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 3121 ; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 {%k1} {z} 3122 ; CHECK-NEXT: retq 3123 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3124 %p = load <8 x double>, <8 x double>* %ptr 3125 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3126 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 3127 ret <8 x double> %s 3128 } 3129 3130 define <16 x float> @nearbyint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 3131 ; CHECK-LABEL: nearbyint_v16f32_maskz_load: 3132 ; CHECK: ## %bb.0: 3133 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 3134 ; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 {%k1} {z} 3135 ; CHECK-NEXT: retq 3136 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3137 %p = load <16 x float>, <16 x float>* %ptr 3138 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3139 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 3140 ret <16 x float> %s 3141 } 3142 3143 define <2 x double> @nearbyint_v2f64_broadcast(double* %ptr) { 3144 ; CHECK-LABEL: nearbyint_v2f64_broadcast: 3145 ; CHECK: ## %bb.0: 3146 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 3147 ; CHECK-NEXT: retq 3148 %ps = load double, double* %ptr 3149 %pins = insertelement <2 x double> undef, double %ps, i32 0 3150 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 3151 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3152 ret <2 x double> %t 3153 } 3154 3155 define <4 x float> @nearbyint_v4f32_broadcast(float* %ptr) { 3156 ; CHECK-LABEL: nearbyint_v4f32_broadcast: 3157 ; CHECK: ## %bb.0: 3158 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 3159 ; CHECK-NEXT: retq 3160 %ps = load float, float* %ptr 3161 %pins = insertelement <4 x float> undef, float %ps, i32 0 3162 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 3163 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3164 ret <4 x float> %t 3165 } 3166 3167 define <4 x double> @nearbyint_v4f64_broadcast(double* %ptr){ 3168 ; CHECK-LABEL: nearbyint_v4f64_broadcast: 3169 ; CHECK: ## %bb.0: 3170 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 3171 ; CHECK-NEXT: retq 3172 %ps = load double, double* %ptr 3173 %pins = insertelement <4 x double> undef, double %ps, i32 0 3174 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 3175 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3176 ret <4 x double> %t 3177 } 3178 3179 define <8 x float> @nearbyint_v8f32_broadcast(float* %ptr) { 3180 ; CHECK-LABEL: nearbyint_v8f32_broadcast: 3181 ; CHECK: ## %bb.0: 3182 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 3183 ; CHECK-NEXT: retq 3184 %ps = load float, float* %ptr 3185 %pins = insertelement <8 x float> undef, float %ps, i32 0 3186 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 3187 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3188 ret <8 x float> %t 3189 } 3190 3191 define <8 x double> @nearbyint_v8f64_broadcast(double* %ptr){ 3192 ; CHECK-LABEL: nearbyint_v8f64_broadcast: 3193 ; CHECK: ## %bb.0: 3194 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 3195 ; CHECK-NEXT: retq 3196 %ps = load double, double* %ptr 3197 %pins = insertelement <8 x double> undef, double %ps, i32 0 3198 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 3199 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3200 ret <8 x double> %t 3201 } 3202 3203 define <16 x float> @nearbyint_v16f32_broadcast(float* %ptr) { 3204 ; CHECK-LABEL: nearbyint_v16f32_broadcast: 3205 ; CHECK: ## %bb.0: 3206 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 3207 ; CHECK-NEXT: retq 3208 %ps = load float, float* %ptr 3209 %pins = insertelement <16 x float> undef, float %ps, i32 0 3210 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 3211 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3212 ret <16 x float> %t 3213 } 3214 3215 define <2 x double> @nearbyint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 3216 ; CHECK-LABEL: nearbyint_v2f64_mask_broadcast: 3217 ; CHECK: ## %bb.0: 3218 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 3219 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} 3220 ; CHECK-NEXT: retq 3221 %c = icmp eq <2 x i64> %cmp, zeroinitializer 3222 %ps = load double, double* %ptr 3223 %pins = insertelement <2 x double> undef, double %ps, i32 0 3224 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 3225 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3226 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 3227 ret <2 x double> %s 3228 } 3229 3230 define <4 x float> @nearbyint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 3231 ; CHECK-LABEL: nearbyint_v4f32_mask_broadcast: 3232 ; CHECK: ## %bb.0: 3233 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 3234 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} 3235 ; CHECK-NEXT: retq 3236 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3237 %ps = load float, float* %ptr 3238 %pins = insertelement <4 x float> undef, float %ps, i32 0 3239 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 3240 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3241 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 3242 ret <4 x float> %s 3243 } 3244 3245 define <4 x double> @nearbyint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 3246 ; CHECK-LABEL: nearbyint_v4f64_mask_broadcast: 3247 ; CHECK: ## %bb.0: 3248 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 3249 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} 3250 ; CHECK-NEXT: retq 3251 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3252 %ps = load double, double* %ptr 3253 %pins = insertelement <4 x double> undef, double %ps, i32 0 3254 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 3255 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3256 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 3257 ret <4 x double> %s 3258 } 3259 3260 define <8 x float> @nearbyint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 3261 ; CHECK-LABEL: nearbyint_v8f32_mask_broadcast: 3262 ; CHECK: ## %bb.0: 3263 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 3264 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} 3265 ; CHECK-NEXT: retq 3266 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3267 %ps = load float, float* %ptr 3268 %pins = insertelement <8 x float> undef, float %ps, i32 0 3269 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 3270 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3271 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 3272 ret <8 x float> %s 3273 } 3274 3275 define <8 x double> @nearbyint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 3276 ; CHECK-LABEL: nearbyint_v8f64_mask_broadcast: 3277 ; CHECK: ## %bb.0: 3278 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 3279 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} 3280 ; CHECK-NEXT: retq 3281 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3282 %ps = load double, double* %ptr 3283 %pins = insertelement <8 x double> undef, double %ps, i32 0 3284 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 3285 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3286 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 3287 ret <8 x double> %s 3288 } 3289 3290 define <16 x float> @nearbyint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 3291 ; CHECK-LABEL: nearbyint_v16f32_mask_broadcast: 3292 ; CHECK: ## %bb.0: 3293 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 3294 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} 3295 ; CHECK-NEXT: retq 3296 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3297 %ps = load float, float* %ptr 3298 %pins = insertelement <16 x float> undef, float %ps, i32 0 3299 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 3300 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3301 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 3302 ret <16 x float> %s 3303 } 3304 3305 define <2 x double> @nearbyint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 3306 ; CHECK-LABEL: nearbyint_v2f64_maskz_broadcast: 3307 ; CHECK: ## %bb.0: 3308 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 3309 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} {z} 3310 ; CHECK-NEXT: retq 3311 %c = icmp eq <2 x i64> %cmp, zeroinitializer 3312 %ps = load double, double* %ptr 3313 %pins = insertelement <2 x double> undef, double %ps, i32 0 3314 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 3315 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3316 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 3317 ret <2 x double> %s 3318 } 3319 3320 define <4 x float> @nearbyint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 3321 ; CHECK-LABEL: nearbyint_v4f32_maskz_broadcast: 3322 ; CHECK: ## %bb.0: 3323 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 3324 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} {z} 3325 ; CHECK-NEXT: retq 3326 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3327 %ps = load float, float* %ptr 3328 %pins = insertelement <4 x float> undef, float %ps, i32 0 3329 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 3330 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3331 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 3332 ret <4 x float> %s 3333 } 3334 3335 define <4 x double> @nearbyint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 3336 ; CHECK-LABEL: nearbyint_v4f64_maskz_broadcast: 3337 ; CHECK: ## %bb.0: 3338 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 3339 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} {z} 3340 ; CHECK-NEXT: retq 3341 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3342 %ps = load double, double* %ptr 3343 %pins = insertelement <4 x double> undef, double %ps, i32 0 3344 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 3345 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3346 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 3347 ret <4 x double> %s 3348 } 3349 3350 define <8 x float> @nearbyint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 3351 ; CHECK-LABEL: nearbyint_v8f32_maskz_broadcast: 3352 ; CHECK: ## %bb.0: 3353 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 3354 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} {z} 3355 ; CHECK-NEXT: retq 3356 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3357 %ps = load float, float* %ptr 3358 %pins = insertelement <8 x float> undef, float %ps, i32 0 3359 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 3360 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3361 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 3362 ret <8 x float> %s 3363 } 3364 3365 define <8 x double> @nearbyint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 3366 ; CHECK-LABEL: nearbyint_v8f64_maskz_broadcast: 3367 ; CHECK: ## %bb.0: 3368 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 3369 ; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} {z} 3370 ; CHECK-NEXT: retq 3371 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3372 %ps = load double, double* %ptr 3373 %pins = insertelement <8 x double> undef, double %ps, i32 0 3374 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 3375 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3376 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 3377 ret <8 x double> %s 3378 } 3379 3380 define <16 x float> @nearbyint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 3381 ; CHECK-LABEL: nearbyint_v16f32_maskz_broadcast: 3382 ; CHECK: ## %bb.0: 3383 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 3384 ; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} {z} 3385 ; CHECK-NEXT: retq 3386 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3387 %ps = load float, float* %ptr 3388 %pins = insertelement <16 x float> undef, float %ps, i32 0 3389 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 3390 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3391 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 3392 ret <16 x float> %s 3393 } 3394