1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5 define <2 x double> @floor_v2f64(<2 x double> %p) { 6 ; SSE41-LABEL: floor_v2f64: 7 ; SSE41: ## BB#0: 8 ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 9 ; SSE41-NEXT: retq 10 ; 11 ; AVX-LABEL: floor_v2f64: 12 ; AVX: ## BB#0: 13 ; AVX-NEXT: vroundpd $9, %xmm0, %xmm0 14 ; AVX-NEXT: retq 15 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 16 ret <2 x double> %t 17 } 18 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) 19 20 define <4 x float> @floor_v4f32(<4 x float> %p) { 21 ; SSE41-LABEL: floor_v4f32: 22 ; SSE41: ## BB#0: 23 ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 24 ; SSE41-NEXT: retq 25 ; 26 ; AVX-LABEL: floor_v4f32: 27 ; AVX: ## BB#0: 28 ; AVX-NEXT: vroundps $9, %xmm0, %xmm0 29 ; AVX-NEXT: retq 30 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 31 ret <4 x float> %t 32 } 33 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) 34 35 define <4 x double> @floor_v4f64(<4 x double> %p){ 36 ; SSE41-LABEL: floor_v4f64: 37 ; SSE41: ## BB#0: 38 ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 39 ; SSE41-NEXT: roundpd $9, %xmm1, %xmm1 40 ; SSE41-NEXT: retq 41 ; 42 ; AVX-LABEL: floor_v4f64: 43 ; AVX: ## BB#0: 44 ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 45 ; AVX-NEXT: retq 46 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 47 ret <4 x double> %t 48 } 49 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) 50 51 define <8 x float> @floor_v8f32(<8 x float> %p) { 52 ; SSE41-LABEL: floor_v8f32: 53 ; SSE41: ## BB#0: 54 ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 55 ; SSE41-NEXT: roundps $9, %xmm1, %xmm1 56 ; SSE41-NEXT: retq 57 ; 58 ; AVX-LABEL: floor_v8f32: 59 ; AVX: ## BB#0: 60 ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 61 ; AVX-NEXT: retq 62 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 63 ret <8 x float> %t 64 } 65 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) 66 67 define <2 x double> @ceil_v2f64(<2 x double> %p) { 68 ; SSE41-LABEL: ceil_v2f64: 69 ; SSE41: ## BB#0: 70 ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 71 ; SSE41-NEXT: retq 72 ; 73 ; AVX-LABEL: ceil_v2f64: 74 ; AVX: ## BB#0: 75 ; AVX-NEXT: vroundpd $10, %xmm0, %xmm0 76 ; AVX-NEXT: retq 77 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 78 ret <2 x double> %t 79 } 80 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 81 82 define <4 x float> @ceil_v4f32(<4 x float> %p) { 83 ; SSE41-LABEL: ceil_v4f32: 84 ; SSE41: ## BB#0: 85 ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 86 ; SSE41-NEXT: retq 87 ; 88 ; AVX-LABEL: ceil_v4f32: 89 ; AVX: ## BB#0: 90 ; AVX-NEXT: vroundps $10, %xmm0, %xmm0 91 ; AVX-NEXT: retq 92 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 93 ret <4 x float> %t 94 } 95 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 96 97 define <4 x double> @ceil_v4f64(<4 x double> %p) { 98 ; SSE41-LABEL: ceil_v4f64: 99 ; SSE41: ## BB#0: 100 ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 101 ; SSE41-NEXT: roundpd $10, %xmm1, %xmm1 102 ; SSE41-NEXT: retq 103 ; 104 ; AVX-LABEL: ceil_v4f64: 105 ; AVX: ## BB#0: 106 ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 107 ; AVX-NEXT: retq 108 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 109 ret <4 x double> %t 110 } 111 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 112 113 define <8 x float> @ceil_v8f32(<8 x float> %p) { 114 ; SSE41-LABEL: ceil_v8f32: 115 ; SSE41: ## BB#0: 116 ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 117 ; SSE41-NEXT: roundps $10, %xmm1, %xmm1 118 ; SSE41-NEXT: retq 119 ; 120 ; AVX-LABEL: ceil_v8f32: 121 ; AVX: ## BB#0: 122 ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 123 ; AVX-NEXT: retq 124 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 125 ret <8 x float> %t 126 } 127 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 128 129 define <2 x double> @trunc_v2f64(<2 x double> %p) { 130 ; SSE41-LABEL: trunc_v2f64: 131 ; SSE41: ## BB#0: 132 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 133 ; SSE41-NEXT: retq 134 ; 135 ; AVX-LABEL: trunc_v2f64: 136 ; AVX: ## BB#0: 137 ; AVX-NEXT: vroundpd $11, %xmm0, %xmm0 138 ; AVX-NEXT: retq 139 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 140 ret <2 x double> %t 141 } 142 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 143 144 define <4 x float> @trunc_v4f32(<4 x float> %p) { 145 ; SSE41-LABEL: trunc_v4f32: 146 ; SSE41: ## BB#0: 147 ; SSE41-NEXT: roundps $11, %xmm0, %xmm0 148 ; SSE41-NEXT: retq 149 ; 150 ; AVX-LABEL: trunc_v4f32: 151 ; AVX: ## BB#0: 152 ; AVX-NEXT: vroundps $11, %xmm0, %xmm0 153 ; AVX-NEXT: retq 154 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 155 ret <4 x float> %t 156 } 157 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 158 159 define <4 x double> @trunc_v4f64(<4 x double> %p) { 160 ; SSE41-LABEL: trunc_v4f64: 161 ; SSE41: ## BB#0: 162 ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 163 ; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 164 ; SSE41-NEXT: retq 165 ; 166 ; AVX-LABEL: trunc_v4f64: 167 ; AVX: ## BB#0: 168 ; AVX-NEXT: vroundpd $11, %ymm0, %ymm0 169 ; AVX-NEXT: retq 170 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 171 ret <4 x double> %t 172 } 173 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 174 175 define <8 x float> @trunc_v8f32(<8 x float> %p) { 176 ; SSE41-LABEL: trunc_v8f32: 177 ; SSE41: ## BB#0: 178 ; SSE41-NEXT: roundps $11, %xmm0, %xmm0 179 ; SSE41-NEXT: roundps $11, %xmm1, %xmm1 180 ; SSE41-NEXT: retq 181 ; 182 ; AVX-LABEL: trunc_v8f32: 183 ; AVX: ## BB#0: 184 ; AVX-NEXT: vroundps $11, %ymm0, %ymm0 185 ; AVX-NEXT: retq 186 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 187 ret <8 x float> %t 188 } 189 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 190 191 define <2 x double> @rint_v2f64(<2 x double> %p) { 192 ; SSE41-LABEL: rint_v2f64: 193 ; SSE41: ## BB#0: 194 ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 195 ; SSE41-NEXT: retq 196 ; 197 ; AVX-LABEL: rint_v2f64: 198 ; AVX: ## BB#0: 199 ; AVX-NEXT: vroundpd $4, %xmm0, %xmm0 200 ; AVX-NEXT: retq 201 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 202 ret <2 x double> %t 203 } 204 declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) 205 206 define <4 x float> @rint_v4f32(<4 x float> %p) { 207 ; SSE41-LABEL: rint_v4f32: 208 ; SSE41: ## BB#0: 209 ; SSE41-NEXT: roundps $4, %xmm0, %xmm0 210 ; SSE41-NEXT: retq 211 ; 212 ; AVX-LABEL: rint_v4f32: 213 ; AVX: ## BB#0: 214 ; AVX-NEXT: vroundps $4, %xmm0, %xmm0 215 ; AVX-NEXT: retq 216 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 217 ret <4 x float> %t 218 } 219 declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) 220 221 define <4 x double> @rint_v4f64(<4 x double> %p) { 222 ; SSE41-LABEL: rint_v4f64: 223 ; SSE41: ## BB#0: 224 ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 225 ; SSE41-NEXT: roundpd $4, %xmm1, %xmm1 226 ; SSE41-NEXT: retq 227 ; 228 ; AVX-LABEL: rint_v4f64: 229 ; AVX: ## BB#0: 230 ; AVX-NEXT: vroundpd $4, %ymm0, %ymm0 231 ; AVX-NEXT: retq 232 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 233 ret <4 x double> %t 234 } 235 declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) 236 237 define <8 x float> @rint_v8f32(<8 x float> %p) { 238 ; SSE41-LABEL: rint_v8f32: 239 ; SSE41: ## BB#0: 240 ; SSE41-NEXT: roundps $4, %xmm0, %xmm0 241 ; SSE41-NEXT: roundps $4, %xmm1, %xmm1 242 ; SSE41-NEXT: retq 243 ; 244 ; AVX-LABEL: rint_v8f32: 245 ; AVX: ## BB#0: 246 ; AVX-NEXT: vroundps $4, %ymm0, %ymm0 247 ; AVX-NEXT: retq 248 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 249 ret <8 x float> %t 250 } 251 declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) 252 253 define <2 x double> @nearbyint_v2f64(<2 x double> %p) { 254 ; SSE41-LABEL: nearbyint_v2f64: 255 ; SSE41: ## BB#0: 256 ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 257 ; SSE41-NEXT: retq 258 ; 259 ; AVX-LABEL: nearbyint_v2f64: 260 ; AVX: ## BB#0: 261 ; AVX-NEXT: vroundpd $12, %xmm0, %xmm0 262 ; AVX-NEXT: retq 263 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 264 ret <2 x double> %t 265 } 266 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 267 268 define <4 x float> @nearbyint_v4f32(<4 x float> %p) { 269 ; SSE41-LABEL: nearbyint_v4f32: 270 ; SSE41: ## BB#0: 271 ; SSE41-NEXT: roundps $12, %xmm0, %xmm0 272 ; SSE41-NEXT: retq 273 ; 274 ; AVX-LABEL: nearbyint_v4f32: 275 ; AVX: ## BB#0: 276 ; AVX-NEXT: vroundps $12, %xmm0, %xmm0 277 ; AVX-NEXT: retq 278 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 279 ret <4 x float> %t 280 } 281 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 282 283 define <4 x double> @nearbyint_v4f64(<4 x double> %p) { 284 ; SSE41-LABEL: nearbyint_v4f64: 285 ; SSE41: ## BB#0: 286 ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 287 ; SSE41-NEXT: roundpd $12, %xmm1, %xmm1 288 ; SSE41-NEXT: retq 289 ; 290 ; AVX-LABEL: nearbyint_v4f64: 291 ; AVX: ## BB#0: 292 ; AVX-NEXT: vroundpd $12, %ymm0, %ymm0 293 ; AVX-NEXT: retq 294 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 295 ret <4 x double> %t 296 } 297 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 298 299 define <8 x float> @nearbyint_v8f32(<8 x float> %p) { 300 ; SSE41-LABEL: nearbyint_v8f32: 301 ; SSE41: ## BB#0: 302 ; SSE41-NEXT: roundps $12, %xmm0, %xmm0 303 ; SSE41-NEXT: roundps $12, %xmm1, %xmm1 304 ; SSE41-NEXT: retq 305 ; 306 ; AVX-LABEL: nearbyint_v8f32: 307 ; AVX: ## BB#0: 308 ; AVX-NEXT: vroundps $12, %ymm0, %ymm0 309 ; AVX-NEXT: retq 310 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 311 ret <8 x float> %t 312 } 313 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 314 315 ; 316 ; Constant Folding 317 ; 318 319 define <2 x double> @const_floor_v2f64() { 320 ; SSE41-LABEL: const_floor_v2f64: 321 ; SSE41: ## BB#0: 322 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] 323 ; SSE41-NEXT: retq 324 ; 325 ; AVX-LABEL: const_floor_v2f64: 326 ; AVX: ## BB#0: 327 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] 328 ; AVX-NEXT: retq 329 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>) 330 ret <2 x double> %t 331 } 332 333 define <4 x float> @const_floor_v4f32() { 334 ; SSE41-LABEL: const_floor_v4f32: 335 ; SSE41: ## BB#0: 336 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 337 ; SSE41-NEXT: retq 338 ; 339 ; AVX-LABEL: const_floor_v4f32: 340 ; AVX: ## BB#0: 341 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 342 ; AVX-NEXT: retq 343 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) 344 ret <4 x float> %t 345 } 346 347 define <2 x double> @const_ceil_v2f64() { 348 ; SSE41-LABEL: const_ceil_v2f64: 349 ; SSE41: ## BB#0: 350 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] 351 ; SSE41-NEXT: retq 352 ; 353 ; AVX-LABEL: const_ceil_v2f64: 354 ; AVX: ## BB#0: 355 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] 356 ; AVX-NEXT: retq 357 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>) 358 ret <2 x double> %t 359 } 360 361 define <4 x float> @const_ceil_v4f32() { 362 ; SSE41-LABEL: const_ceil_v4f32: 363 ; SSE41: ## BB#0: 364 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] 365 ; SSE41-NEXT: retq 366 ; 367 ; AVX-LABEL: const_ceil_v4f32: 368 ; AVX: ## BB#0: 369 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] 370 ; AVX-NEXT: retq 371 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) 372 ret <4 x float> %t 373 } 374 375 define <2 x double> @const_trunc_v2f64() { 376 ; SSE41-LABEL: const_trunc_v2f64: 377 ; SSE41: ## BB#0: 378 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] 379 ; SSE41-NEXT: retq 380 ; 381 ; AVX-LABEL: const_trunc_v2f64: 382 ; AVX: ## BB#0: 383 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] 384 ; AVX-NEXT: retq 385 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>) 386 ret <2 x double> %t 387 } 388 389 define <4 x float> @const_trunc_v4f32() { 390 ; SSE41-LABEL: const_trunc_v4f32: 391 ; SSE41: ## BB#0: 392 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 393 ; SSE41-NEXT: retq 394 ; 395 ; AVX-LABEL: const_trunc_v4f32: 396 ; AVX: ## BB#0: 397 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 398 ; AVX-NEXT: retq 399 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) 400 ret <4 x float> %t 401 } 402