1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s 3 ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s 4 5 define <2 x double> @constrained_vector_fdiv_v2f64() { 6 ; NO-FMA-LABEL: constrained_vector_fdiv_v2f64: 7 ; NO-FMA: # %bb.0: # %entry 8 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] 9 ; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0 10 ; NO-FMA-NEXT: retq 11 ; 12 ; HAS-FMA-LABEL: constrained_vector_fdiv_v2f64: 13 ; HAS-FMA: # %bb.0: # %entry 14 ; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] 15 ; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0 16 ; HAS-FMA-NEXT: retq 17 entry: 18 %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( 19 <2 x double> <double 1.000000e+00, double 2.000000e+00>, 20 <2 x double> <double 1.000000e+01, double 1.000000e+01>, 21 metadata !"round.dynamic", 22 metadata !"fpexcept.strict") 23 ret <2 x double> %div 24 } 25 26 define <4 x double> @constrained_vector_fdiv_v4f64() { 27 ; NO-FMA-LABEL: constrained_vector_fdiv_v4f64: 28 ; NO-FMA: # %bb.0: 29 ; NO-FMA-NEXT: movapd {{.*#+}} xmm2 = [1.000000e+01,1.000000e+01] 30 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] 31 ; NO-FMA-NEXT: divpd %xmm2, %xmm0 32 ; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [3.000000e+00,4.000000e+00] 33 ; NO-FMA-NEXT: divpd %xmm2, %xmm1 34 ; NO-FMA-NEXT: retq 35 ; 36 ; HAS-FMA-LABEL: constrained_vector_fdiv_v4f64: 37 ; HAS-FMA: # %bb.0: 38 ; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] 39 ; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0 40 ; HAS-FMA-NEXT: retq 41 %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64( 42 <4 x double> <double 1.000000e+00, double 2.000000e+00, 43 double 3.000000e+00, double 4.000000e+00>, 44 <4 x double> <double 1.000000e+01, double 1.000000e+01, 45 double 1.000000e+01, double 1.000000e+01>, 46 metadata !"round.dynamic", 47 metadata !"fpexcept.strict") 48 ret <4 x double> %div 49 } 50 51 define <2 x double> @constrained_vector_fmul_v2f64() { 52 ; NO-FMA-LABEL: constrained_vector_fmul_v2f64: 53 ; NO-FMA: # %bb.0: # %entry 54 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] 55 ; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0 56 ; NO-FMA-NEXT: retq 57 ; 58 ; HAS-FMA-LABEL: constrained_vector_fmul_v2f64: 59 ; HAS-FMA: # %bb.0: # %entry 60 ; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] 61 ; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0 62 ; HAS-FMA-NEXT: retq 63 entry: 64 %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( 65 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 66 <2 x double> <double 2.000000e+00, double 3.000000e+00>, 67 metadata !"round.dynamic", 68 metadata !"fpexcept.strict") 69 ret <2 x double> %mul 70 } 71 72 define <4 x double> @constrained_vector_fmul_v4f64() { 73 ; NO-FMA-LABEL: constrained_vector_fmul_v4f64: 74 ; NO-FMA: # %bb.0: # %entry 75 ; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308] 76 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [2.000000e+00,3.000000e+00] 77 ; NO-FMA-NEXT: mulpd %xmm1, %xmm0 78 ; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm1 79 ; NO-FMA-NEXT: retq 80 ; 81 ; HAS-FMA-LABEL: constrained_vector_fmul_v4f64: 82 ; HAS-FMA: # %bb.0: # %entry 83 ; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308] 84 ; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 85 ; HAS-FMA-NEXT: retq 86 entry: 87 %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64( 88 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 89 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 90 <4 x double> <double 2.000000e+00, double 3.000000e+00, 91 double 4.000000e+00, double 5.000000e+00>, 92 metadata !"round.dynamic", 93 metadata !"fpexcept.strict") 94 ret <4 x double> %mul 95 } 96 97 98 define <2 x double> @constrained_vector_fadd_v2f64() { 99 ; NO-FMA-LABEL: constrained_vector_fadd_v2f64: 100 ; NO-FMA: # %bb.0: # %entry 101 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] 102 ; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0 103 ; NO-FMA-NEXT: retq 104 ; 105 ; HAS-FMA-LABEL: constrained_vector_fadd_v2f64: 106 ; HAS-FMA: # %bb.0: # %entry 107 ; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] 108 ; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 109 ; HAS-FMA-NEXT: retq 110 entry: 111 %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( 112 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 113 <2 x double> <double 1.000000e+00, double 1.000000e-01>, 114 metadata !"round.dynamic", 115 metadata !"fpexcept.strict") 116 ret <2 x double> %add 117 } 118 119 define <4 x double> @constrained_vector_fadd_v4f64() { 120 ; NO-FMA-LABEL: constrained_vector_fadd_v4f64: 121 ; NO-FMA: # %bb.0: # %entry 122 ; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308] 123 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,1.000000e-01] 124 ; NO-FMA-NEXT: addpd %xmm1, %xmm0 125 ; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm1 126 ; NO-FMA-NEXT: retq 127 ; 128 ; HAS-FMA-LABEL: constrained_vector_fadd_v4f64: 129 ; HAS-FMA: # %bb.0: # %entry 130 ; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308] 131 ; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 132 ; HAS-FMA-NEXT: retq 133 entry: 134 %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( 135 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 136 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 137 <4 x double> <double 1.000000e+00, double 1.000000e-01, 138 double 2.000000e+00, double 2.000000e-01>, 139 metadata !"round.dynamic", 140 metadata !"fpexcept.strict") 141 ret <4 x double> %add 142 } 143 144 define <2 x double> @constrained_vector_fsub_v2f64() { 145 ; NO-FMA-LABEL: constrained_vector_fsub_v2f64: 146 ; NO-FMA: # %bb.0: # %entry 147 ; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308] 148 ; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0 149 ; NO-FMA-NEXT: retq 150 ; 151 ; HAS-FMA-LABEL: constrained_vector_fsub_v2f64: 152 ; HAS-FMA: # %bb.0: # %entry 153 ; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308] 154 ; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 155 ; HAS-FMA-NEXT: retq 156 entry: 157 %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( 158 <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>, 159 <2 x double> <double 1.000000e+00, double 1.000000e-01>, 160 metadata !"round.dynamic", 161 metadata !"fpexcept.strict") 162 ret <2 x double> %sub 163 } 164 165 define <4 x double> @constrained_vector_fsub_v4f64() { 166 ; NO-FMA-LABEL: constrained_vector_fsub_v4f64: 167 ; NO-FMA: # %bb.0: # %entry 168 ; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [-1.797693e+308,-1.797693e+308] 169 ; NO-FMA-NEXT: movapd %xmm1, %xmm0 170 ; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0 171 ; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm1 172 ; NO-FMA-NEXT: retq 173 ; 174 ; HAS-FMA-LABEL: constrained_vector_fsub_v4f64: 175 ; HAS-FMA: # %bb.0: # %entry 176 ; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [-1.797693e+308,-1.797693e+308,-1.797693e+308,-1.797693e+308] 177 ; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 178 ; HAS-FMA-NEXT: retq 179 entry: 180 %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64( 181 <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF, 182 double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>, 183 <4 x double> <double 1.000000e+00, double 1.000000e-01, 184 double 2.000000e+00, double 2.000000e-01>, 185 metadata !"round.dynamic", 186 metadata !"fpexcept.strict") 187 ret <4 x double> %sub 188 } 189 190 define <2 x double> @constrained_vector_fma_v2f64() { 191 ; NO-FMA-LABEL: constrained_vector_fma_v2f64: 192 ; NO-FMA: # %bb.0: # %entry 193 ; NO-FMA-NEXT: subq $24, %rsp 194 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 195 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 196 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 197 ; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 198 ; NO-FMA-NEXT: callq fma 199 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 200 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 201 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 202 ; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 203 ; NO-FMA-NEXT: callq fma 204 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 205 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 206 ; NO-FMA-NEXT: addq $24, %rsp 207 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 208 ; NO-FMA-NEXT: retq 209 ; 210 ; HAS-FMA-LABEL: constrained_vector_fma_v2f64: 211 ; HAS-FMA: # %bb.0: # %entry 212 ; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.500000e+00,5.000000e-01] 213 ; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.500000e+00,2.500000e+00] 214 ; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem 215 ; HAS-FMA-NEXT: retq 216 entry: 217 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( 218 <2 x double> <double 1.5, double 0.5>, 219 <2 x double> <double 3.5, double 2.5>, 220 <2 x double> <double 5.5, double 4.5>, 221 metadata !"round.dynamic", 222 metadata !"fpexcept.strict") 223 ret <2 x double> %fma 224 } 225 226 define <4 x double> @constrained_vector_fma_v4f64() { 227 ; NO-FMA-LABEL: constrained_vector_fma_v4f64: 228 ; NO-FMA: # %bb.0: # %entry 229 ; NO-FMA-NEXT: subq $40, %rsp 230 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 231 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 232 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 233 ; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 234 ; NO-FMA-NEXT: callq fma 235 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 236 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 237 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 238 ; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 239 ; NO-FMA-NEXT: callq fma 240 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 241 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 242 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 243 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 244 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 245 ; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 246 ; NO-FMA-NEXT: callq fma 247 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 248 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 249 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 250 ; NO-FMA-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 251 ; NO-FMA-NEXT: callq fma 252 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 253 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 254 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 255 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 256 ; NO-FMA-NEXT: addq $40, %rsp 257 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 258 ; NO-FMA-NEXT: retq 259 ; 260 ; HAS-FMA-LABEL: constrained_vector_fma_v4f64: 261 ; HAS-FMA: # %bb.0: # %entry 262 ; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01] 263 ; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00] 264 ; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem 265 ; HAS-FMA-NEXT: retq 266 entry: 267 %fma = call <4 x double> @llvm.experimental.constrained.fma.v4f64( 268 <4 x double> <double 3.5, double 2.5, double 1.5, double 0.5>, 269 <4 x double> <double 7.5, double 6.5, double 5.5, double 4.5>, 270 <4 x double> <double 11.5, double 10.5, double 9.5, double 8.5>, 271 metadata !"round.dynamic", 272 metadata !"fpexcept.strict") 273 ret <4 x double> %fma 274 } 275 276 define <4 x float> @constrained_vector_fma_v4f32() { 277 ; NO-FMA-LABEL: constrained_vector_fma_v4f32: 278 ; NO-FMA: # %bb.0: # %entry 279 ; NO-FMA-NEXT: subq $40, %rsp 280 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 281 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 282 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 283 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 284 ; NO-FMA-NEXT: callq fmaf 285 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 286 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 287 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 288 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 289 ; NO-FMA-NEXT: callq fmaf 290 ; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 291 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 292 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 293 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 294 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 295 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 296 ; NO-FMA-NEXT: callq fmaf 297 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 298 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 299 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 300 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 301 ; NO-FMA-NEXT: callq fmaf 302 ; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 303 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 304 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 305 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 306 ; NO-FMA-NEXT: addq $40, %rsp 307 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 308 ; NO-FMA-NEXT: retq 309 ; 310 ; HAS-FMA-LABEL: constrained_vector_fma_v4f32: 311 ; HAS-FMA: # %bb.0: # %entry 312 ; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01] 313 ; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00] 314 ; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem 315 ; HAS-FMA-NEXT: retq 316 entry: 317 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( 318 <4 x float> <float 3.5, float 2.5, float 1.5, float 0.5>, 319 <4 x float> <float 7.5, float 6.5, float 5.5, float 4.5>, 320 <4 x float> <float 11.5, float 10.5, float 9.5, float 8.5>, 321 metadata !"round.dynamic", 322 metadata !"fpexcept.strict") 323 ret <4 x float> %fma 324 } 325 326 define <8 x float> @constrained_vector_fma_v8f32() { 327 ; NO-FMA-LABEL: constrained_vector_fma_v8f32: 328 ; NO-FMA: # %bb.0: # %entry 329 ; NO-FMA-NEXT: subq $56, %rsp 330 ; NO-FMA-NEXT: .cfi_def_cfa_offset 64 331 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 332 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 333 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 334 ; NO-FMA-NEXT: callq fmaf 335 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 336 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 337 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 338 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 339 ; NO-FMA-NEXT: callq fmaf 340 ; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 341 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 342 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 343 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 344 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 345 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 346 ; NO-FMA-NEXT: callq fmaf 347 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 348 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 349 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 350 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 351 ; NO-FMA-NEXT: callq fmaf 352 ; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 353 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 354 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 355 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 356 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 357 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 358 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 359 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 360 ; NO-FMA-NEXT: callq fmaf 361 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 362 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 363 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 364 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 365 ; NO-FMA-NEXT: callq fmaf 366 ; NO-FMA-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 367 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 368 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 369 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 370 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 371 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 372 ; NO-FMA-NEXT: callq fmaf 373 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 374 ; NO-FMA-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 375 ; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 376 ; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 377 ; NO-FMA-NEXT: callq fmaf 378 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 379 ; NO-FMA-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 380 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 381 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload 382 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 383 ; NO-FMA-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 384 ; NO-FMA-NEXT: addq $56, %rsp 385 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 386 ; NO-FMA-NEXT: retq 387 ; 388 ; HAS-FMA-LABEL: constrained_vector_fma_v8f32: 389 ; HAS-FMA: # %bb.0: # %entry 390 ; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01,7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00] 391 ; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00,1.150000e+01,1.050000e+01,9.500000e+00,8.500000e+00] 392 ; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem 393 ; HAS-FMA-NEXT: retq 394 entry: 395 %fma = call <8 x float> @llvm.experimental.constrained.fma.v8f32( 396 <8 x float> <float 3.5, float 2.5, float 1.5, float 0.5, 397 float 7.5, float 6.5, float 5.5, float 4.5>, 398 <8 x float> <float 7.5, float 6.5, float 5.5, float 4.5, 399 float 11.5, float 10.5, float 9.5, float 8.5>, 400 <8 x float> <float 11.5, float 10.5, float 9.5, float 8.5, 401 float 15.5, float 14.5, float 13.5, float 12.5>, 402 metadata !"round.dynamic", 403 metadata !"fpexcept.strict") 404 ret <8 x float> %fma 405 } 406 407 define <2 x double> @constrained_vector_sqrt_v2f64() { 408 ; NO-FMA-LABEL: constrained_vector_sqrt_v2f64: 409 ; NO-FMA: # %bb.0: # %entry 410 ; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0 411 ; NO-FMA-NEXT: retq 412 ; 413 ; HAS-FMA-LABEL: constrained_vector_sqrt_v2f64: 414 ; HAS-FMA: # %bb.0: # %entry 415 ; HAS-FMA-NEXT: vsqrtpd {{.*}}(%rip), %xmm0 416 ; HAS-FMA-NEXT: retq 417 entry: 418 %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( 419 <2 x double> <double 42.0, double 42.1>, 420 metadata !"round.dynamic", 421 metadata !"fpexcept.strict") 422 ret <2 x double> %sqrt 423 } 424 425 define <4 x double> @constrained_vector_sqrt_v4f64() { 426 ; NO-FMA-LABEL: constrained_vector_sqrt_v4f64: 427 ; NO-FMA: # %bb.0: # %entry 428 ; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0 429 ; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm1 430 ; NO-FMA-NEXT: retq 431 ; 432 ; HAS-FMA-LABEL: constrained_vector_sqrt_v4f64: 433 ; HAS-FMA: # %bb.0: # %entry 434 ; HAS-FMA-NEXT: vsqrtpd {{.*}}(%rip), %ymm0 435 ; HAS-FMA-NEXT: retq 436 entry: 437 %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( 438 <4 x double> <double 42.0, double 42.1, 439 double 42.2, double 42.3>, 440 metadata !"round.dynamic", 441 metadata !"fpexcept.strict") 442 ret <4 x double> %sqrt 443 } 444 445 define <2 x double> @constrained_vector_pow_v2f64() { 446 ; NO-FMA-LABEL: constrained_vector_pow_v2f64: 447 ; NO-FMA: # %bb.0: # %entry 448 ; NO-FMA-NEXT: subq $24, %rsp 449 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 450 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 451 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 452 ; NO-FMA-NEXT: callq pow 453 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 454 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 455 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 456 ; NO-FMA-NEXT: callq pow 457 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 458 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 459 ; NO-FMA-NEXT: addq $24, %rsp 460 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 461 ; NO-FMA-NEXT: retq 462 ; 463 ; HAS-FMA-LABEL: constrained_vector_pow_v2f64: 464 ; HAS-FMA: # %bb.0: # %entry 465 ; HAS-FMA-NEXT: subq $24, %rsp 466 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 467 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 468 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 469 ; HAS-FMA-NEXT: callq pow 470 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 471 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 472 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 473 ; HAS-FMA-NEXT: callq pow 474 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 475 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 476 ; HAS-FMA-NEXT: addq $24, %rsp 477 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 478 ; HAS-FMA-NEXT: retq 479 entry: 480 %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64( 481 <2 x double> <double 42.1, double 42.2>, 482 <2 x double> <double 3.0, double 3.0>, 483 metadata !"round.dynamic", 484 metadata !"fpexcept.strict") 485 ret <2 x double> %pow 486 } 487 488 define <4 x double> @constrained_vector_pow_v4f64() { 489 ; NO-FMA-LABEL: constrained_vector_pow_v4f64: 490 ; NO-FMA: # %bb.0: # %entry 491 ; NO-FMA-NEXT: subq $40, %rsp 492 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 493 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 494 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 495 ; NO-FMA-NEXT: callq pow 496 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 497 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 498 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 499 ; NO-FMA-NEXT: callq pow 500 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 501 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 502 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 503 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 504 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 505 ; NO-FMA-NEXT: callq pow 506 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 507 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 508 ; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 509 ; NO-FMA-NEXT: callq pow 510 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 511 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 512 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 513 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 514 ; NO-FMA-NEXT: addq $40, %rsp 515 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 516 ; NO-FMA-NEXT: retq 517 ; 518 ; HAS-FMA-LABEL: constrained_vector_pow_v4f64: 519 ; HAS-FMA: # %bb.0: # %entry 520 ; HAS-FMA-NEXT: subq $40, %rsp 521 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 522 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 523 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 524 ; HAS-FMA-NEXT: callq pow 525 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 526 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 527 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 528 ; HAS-FMA-NEXT: callq pow 529 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 530 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 531 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 532 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 533 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 534 ; HAS-FMA-NEXT: callq pow 535 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 536 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 537 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 538 ; HAS-FMA-NEXT: callq pow 539 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 540 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 541 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 542 ; HAS-FMA-NEXT: addq $40, %rsp 543 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 544 ; HAS-FMA-NEXT: retq 545 entry: 546 %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64( 547 <4 x double> <double 42.1, double 42.2, 548 double 42.3, double 42.4>, 549 <4 x double> <double 3.0, double 3.0, 550 double 3.0, double 3.0>, 551 metadata !"round.dynamic", 552 metadata !"fpexcept.strict") 553 ret <4 x double> %pow 554 } 555 556 define <2 x double> @constrained_vector_powi_v2f64() { 557 ; NO-FMA-LABEL: constrained_vector_powi_v2f64: 558 ; NO-FMA: # %bb.0: # %entry 559 ; NO-FMA-NEXT: subq $24, %rsp 560 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 561 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 562 ; NO-FMA-NEXT: movl $3, %edi 563 ; NO-FMA-NEXT: callq __powidf2 564 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 565 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 566 ; NO-FMA-NEXT: movl $3, %edi 567 ; NO-FMA-NEXT: callq __powidf2 568 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 569 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 570 ; NO-FMA-NEXT: addq $24, %rsp 571 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 572 ; NO-FMA-NEXT: retq 573 ; 574 ; HAS-FMA-LABEL: constrained_vector_powi_v2f64: 575 ; HAS-FMA: # %bb.0: # %entry 576 ; HAS-FMA-NEXT: subq $24, %rsp 577 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 578 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 579 ; HAS-FMA-NEXT: movl $3, %edi 580 ; HAS-FMA-NEXT: callq __powidf2 581 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 582 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 583 ; HAS-FMA-NEXT: movl $3, %edi 584 ; HAS-FMA-NEXT: callq __powidf2 585 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 586 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 587 ; HAS-FMA-NEXT: addq $24, %rsp 588 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 589 ; HAS-FMA-NEXT: retq 590 entry: 591 %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64( 592 <2 x double> <double 42.1, double 42.2>, 593 i32 3, 594 metadata !"round.dynamic", 595 metadata !"fpexcept.strict") 596 ret <2 x double> %powi 597 } 598 599 define <4 x double> @constrained_vector_powi_v4f64() { 600 ; NO-FMA-LABEL: constrained_vector_powi_v4f64: 601 ; NO-FMA: # %bb.0: # %entry 602 ; NO-FMA-NEXT: subq $40, %rsp 603 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 604 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 605 ; NO-FMA-NEXT: movl $3, %edi 606 ; NO-FMA-NEXT: callq __powidf2 607 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 608 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 609 ; NO-FMA-NEXT: movl $3, %edi 610 ; NO-FMA-NEXT: callq __powidf2 611 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 612 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 613 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 614 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 615 ; NO-FMA-NEXT: movl $3, %edi 616 ; NO-FMA-NEXT: callq __powidf2 617 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 618 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 619 ; NO-FMA-NEXT: movl $3, %edi 620 ; NO-FMA-NEXT: callq __powidf2 621 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 622 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 623 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 624 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 625 ; NO-FMA-NEXT: addq $40, %rsp 626 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 627 ; NO-FMA-NEXT: retq 628 ; 629 ; HAS-FMA-LABEL: constrained_vector_powi_v4f64: 630 ; HAS-FMA: # %bb.0: # %entry 631 ; HAS-FMA-NEXT: subq $40, %rsp 632 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 633 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 634 ; HAS-FMA-NEXT: movl $3, %edi 635 ; HAS-FMA-NEXT: callq __powidf2 636 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 637 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 638 ; HAS-FMA-NEXT: movl $3, %edi 639 ; HAS-FMA-NEXT: callq __powidf2 640 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 641 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 642 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 643 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 644 ; HAS-FMA-NEXT: movl $3, %edi 645 ; HAS-FMA-NEXT: callq __powidf2 646 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 647 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 648 ; HAS-FMA-NEXT: movl $3, %edi 649 ; HAS-FMA-NEXT: callq __powidf2 650 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 651 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 652 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 653 ; HAS-FMA-NEXT: addq $40, %rsp 654 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 655 ; HAS-FMA-NEXT: retq 656 entry: 657 %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64( 658 <4 x double> <double 42.1, double 42.2, 659 double 42.3, double 42.4>, 660 i32 3, 661 metadata !"round.dynamic", 662 metadata !"fpexcept.strict") 663 ret <4 x double> %powi 664 } 665 666 667 define <2 x double> @constrained_vector_sin_v2f64() { 668 ; NO-FMA-LABEL: constrained_vector_sin_v2f64: 669 ; NO-FMA: # %bb.0: # %entry 670 ; NO-FMA-NEXT: subq $24, %rsp 671 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 672 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 673 ; NO-FMA-NEXT: callq sin 674 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 675 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 676 ; NO-FMA-NEXT: callq sin 677 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 678 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 679 ; NO-FMA-NEXT: addq $24, %rsp 680 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 681 ; NO-FMA-NEXT: retq 682 ; 683 ; HAS-FMA-LABEL: constrained_vector_sin_v2f64: 684 ; HAS-FMA: # %bb.0: # %entry 685 ; HAS-FMA-NEXT: subq $24, %rsp 686 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 687 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 688 ; HAS-FMA-NEXT: callq sin 689 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 690 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 691 ; HAS-FMA-NEXT: callq sin 692 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 693 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 694 ; HAS-FMA-NEXT: addq $24, %rsp 695 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 696 ; HAS-FMA-NEXT: retq 697 entry: 698 %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64( 699 <2 x double> <double 42.0, double 42.1>, 700 metadata !"round.dynamic", 701 metadata !"fpexcept.strict") 702 ret <2 x double> %sin 703 } 704 705 define <4 x double> @constrained_vector_sin_v4f64() { 706 ; NO-FMA-LABEL: constrained_vector_sin_v4f64: 707 ; NO-FMA: # %bb.0: # %entry 708 ; NO-FMA-NEXT: subq $40, %rsp 709 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 710 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 711 ; NO-FMA-NEXT: callq sin 712 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 713 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 714 ; NO-FMA-NEXT: callq sin 715 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 716 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 717 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 718 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 719 ; NO-FMA-NEXT: callq sin 720 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 721 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 722 ; NO-FMA-NEXT: callq sin 723 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 724 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 725 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 726 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 727 ; NO-FMA-NEXT: addq $40, %rsp 728 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 729 ; NO-FMA-NEXT: retq 730 ; 731 ; HAS-FMA-LABEL: constrained_vector_sin_v4f64: 732 ; HAS-FMA: # %bb.0: # %entry 733 ; HAS-FMA-NEXT: subq $40, %rsp 734 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 735 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 736 ; HAS-FMA-NEXT: callq sin 737 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 738 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 739 ; HAS-FMA-NEXT: callq sin 740 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 741 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 742 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 743 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 744 ; HAS-FMA-NEXT: callq sin 745 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 746 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 747 ; HAS-FMA-NEXT: callq sin 748 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 749 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 750 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 751 ; HAS-FMA-NEXT: addq $40, %rsp 752 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 753 ; HAS-FMA-NEXT: retq 754 entry: 755 %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64( 756 <4 x double> <double 42.0, double 42.1, 757 double 42.2, double 42.3>, 758 metadata !"round.dynamic", 759 metadata !"fpexcept.strict") 760 ret <4 x double> %sin 761 } 762 763 define <2 x double> @constrained_vector_cos_v2f64() { 764 ; NO-FMA-LABEL: constrained_vector_cos_v2f64: 765 ; NO-FMA: # %bb.0: # %entry 766 ; NO-FMA-NEXT: subq $24, %rsp 767 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 768 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 769 ; NO-FMA-NEXT: callq cos 770 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 771 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 772 ; NO-FMA-NEXT: callq cos 773 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 774 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 775 ; NO-FMA-NEXT: addq $24, %rsp 776 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 777 ; NO-FMA-NEXT: retq 778 ; 779 ; HAS-FMA-LABEL: constrained_vector_cos_v2f64: 780 ; HAS-FMA: # %bb.0: # %entry 781 ; HAS-FMA-NEXT: subq $24, %rsp 782 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 783 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 784 ; HAS-FMA-NEXT: callq cos 785 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 786 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 787 ; HAS-FMA-NEXT: callq cos 788 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 789 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 790 ; HAS-FMA-NEXT: addq $24, %rsp 791 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 792 ; HAS-FMA-NEXT: retq 793 entry: 794 %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64( 795 <2 x double> <double 42.0, double 42.1>, 796 metadata !"round.dynamic", 797 metadata !"fpexcept.strict") 798 ret <2 x double> %cos 799 } 800 801 define <4 x double> @constrained_vector_cos_v4f64() { 802 ; NO-FMA-LABEL: constrained_vector_cos_v4f64: 803 ; NO-FMA: # %bb.0: # %entry 804 ; NO-FMA-NEXT: subq $40, %rsp 805 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 806 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 807 ; NO-FMA-NEXT: callq cos 808 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 809 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 810 ; NO-FMA-NEXT: callq cos 811 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 812 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 813 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 814 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 815 ; NO-FMA-NEXT: callq cos 816 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 817 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 818 ; NO-FMA-NEXT: callq cos 819 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 820 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 821 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 822 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 823 ; NO-FMA-NEXT: addq $40, %rsp 824 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 825 ; NO-FMA-NEXT: retq 826 ; 827 ; HAS-FMA-LABEL: constrained_vector_cos_v4f64: 828 ; HAS-FMA: # %bb.0: # %entry 829 ; HAS-FMA-NEXT: subq $40, %rsp 830 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 831 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 832 ; HAS-FMA-NEXT: callq cos 833 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 834 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 835 ; HAS-FMA-NEXT: callq cos 836 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 837 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 838 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 839 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 840 ; HAS-FMA-NEXT: callq cos 841 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 842 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 843 ; HAS-FMA-NEXT: callq cos 844 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 845 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 846 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 847 ; HAS-FMA-NEXT: addq $40, %rsp 848 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 849 ; HAS-FMA-NEXT: retq 850 entry: 851 %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64( 852 <4 x double> <double 42.0, double 42.1, 853 double 42.2, double 42.3>, 854 metadata !"round.dynamic", 855 metadata !"fpexcept.strict") 856 ret <4 x double> %cos 857 } 858 859 define <2 x double> @constrained_vector_exp_v2f64() { 860 ; NO-FMA-LABEL: constrained_vector_exp_v2f64: 861 ; NO-FMA: # %bb.0: # %entry 862 ; NO-FMA-NEXT: subq $24, %rsp 863 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 864 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 865 ; NO-FMA-NEXT: callq exp 866 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 867 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 868 ; NO-FMA-NEXT: callq exp 869 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 870 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 871 ; NO-FMA-NEXT: addq $24, %rsp 872 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 873 ; NO-FMA-NEXT: retq 874 ; 875 ; HAS-FMA-LABEL: constrained_vector_exp_v2f64: 876 ; HAS-FMA: # %bb.0: # %entry 877 ; HAS-FMA-NEXT: subq $24, %rsp 878 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 879 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 880 ; HAS-FMA-NEXT: callq exp 881 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 882 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 883 ; HAS-FMA-NEXT: callq exp 884 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 885 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 886 ; HAS-FMA-NEXT: addq $24, %rsp 887 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 888 ; HAS-FMA-NEXT: retq 889 entry: 890 %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64( 891 <2 x double> <double 42.0, double 42.1>, 892 metadata !"round.dynamic", 893 metadata !"fpexcept.strict") 894 ret <2 x double> %exp 895 } 896 897 define <4 x double> @constrained_vector_exp_v4f64() { 898 ; NO-FMA-LABEL: constrained_vector_exp_v4f64: 899 ; NO-FMA: # %bb.0: # %entry 900 ; NO-FMA-NEXT: subq $40, %rsp 901 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 902 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 903 ; NO-FMA-NEXT: callq exp 904 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 905 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 906 ; NO-FMA-NEXT: callq exp 907 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 908 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 909 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 910 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 911 ; NO-FMA-NEXT: callq exp 912 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 913 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 914 ; NO-FMA-NEXT: callq exp 915 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 916 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 917 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 918 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 919 ; NO-FMA-NEXT: addq $40, %rsp 920 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 921 ; NO-FMA-NEXT: retq 922 ; 923 ; HAS-FMA-LABEL: constrained_vector_exp_v4f64: 924 ; HAS-FMA: # %bb.0: # %entry 925 ; HAS-FMA-NEXT: subq $40, %rsp 926 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 927 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 928 ; HAS-FMA-NEXT: callq exp 929 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 930 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 931 ; HAS-FMA-NEXT: callq exp 932 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 933 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 934 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 935 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 936 ; HAS-FMA-NEXT: callq exp 937 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 938 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 939 ; HAS-FMA-NEXT: callq exp 940 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 941 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 942 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 943 ; HAS-FMA-NEXT: addq $40, %rsp 944 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 945 ; HAS-FMA-NEXT: retq 946 entry: 947 %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64( 948 <4 x double> <double 42.0, double 42.1, 949 double 42.2, double 42.3>, 950 metadata !"round.dynamic", 951 metadata !"fpexcept.strict") 952 ret <4 x double> %exp 953 } 954 955 define <2 x double> @constrained_vector_exp2_v2f64() { 956 ; NO-FMA-LABEL: constrained_vector_exp2_v2f64: 957 ; NO-FMA: # %bb.0: # %entry 958 ; NO-FMA-NEXT: subq $24, %rsp 959 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 960 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 961 ; NO-FMA-NEXT: callq exp2 962 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 963 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 964 ; NO-FMA-NEXT: callq exp2 965 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 966 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 967 ; NO-FMA-NEXT: addq $24, %rsp 968 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 969 ; NO-FMA-NEXT: retq 970 ; 971 ; HAS-FMA-LABEL: constrained_vector_exp2_v2f64: 972 ; HAS-FMA: # %bb.0: # %entry 973 ; HAS-FMA-NEXT: subq $24, %rsp 974 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 975 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 976 ; HAS-FMA-NEXT: callq exp2 977 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 978 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 979 ; HAS-FMA-NEXT: callq exp2 980 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 981 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 982 ; HAS-FMA-NEXT: addq $24, %rsp 983 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 984 ; HAS-FMA-NEXT: retq 985 entry: 986 %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64( 987 <2 x double> <double 42.1, double 42.0>, 988 metadata !"round.dynamic", 989 metadata !"fpexcept.strict") 990 ret <2 x double> %exp2 991 } 992 993 define <4 x double> @constrained_vector_exp2_v4f64() { 994 ; NO-FMA-LABEL: constrained_vector_exp2_v4f64: 995 ; NO-FMA: # %bb.0: # %entry 996 ; NO-FMA-NEXT: subq $40, %rsp 997 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 998 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 999 ; NO-FMA-NEXT: callq exp2 1000 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1001 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1002 ; NO-FMA-NEXT: callq exp2 1003 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1004 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1005 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1006 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1007 ; NO-FMA-NEXT: callq exp2 1008 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1009 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1010 ; NO-FMA-NEXT: callq exp2 1011 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 1012 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1013 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 1014 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1015 ; NO-FMA-NEXT: addq $40, %rsp 1016 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1017 ; NO-FMA-NEXT: retq 1018 ; 1019 ; HAS-FMA-LABEL: constrained_vector_exp2_v4f64: 1020 ; HAS-FMA: # %bb.0: # %entry 1021 ; HAS-FMA-NEXT: subq $40, %rsp 1022 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 1023 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1024 ; HAS-FMA-NEXT: callq exp2 1025 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1026 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1027 ; HAS-FMA-NEXT: callq exp2 1028 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1029 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1030 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1031 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1032 ; HAS-FMA-NEXT: callq exp2 1033 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1034 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1035 ; HAS-FMA-NEXT: callq exp2 1036 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1037 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1038 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1039 ; HAS-FMA-NEXT: addq $40, %rsp 1040 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1041 ; HAS-FMA-NEXT: retq 1042 entry: 1043 %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64( 1044 <4 x double> <double 42.1, double 42.2, 1045 double 42.3, double 42.4>, 1046 metadata !"round.dynamic", 1047 metadata !"fpexcept.strict") 1048 ret <4 x double> %exp2 1049 } 1050 1051 define <2 x double> @constrained_vector_log_v2f64() { 1052 ; NO-FMA-LABEL: constrained_vector_log_v2f64: 1053 ; NO-FMA: # %bb.0: # %entry 1054 ; NO-FMA-NEXT: subq $24, %rsp 1055 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 1056 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1057 ; NO-FMA-NEXT: callq log 1058 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1059 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1060 ; NO-FMA-NEXT: callq log 1061 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1062 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1063 ; NO-FMA-NEXT: addq $24, %rsp 1064 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1065 ; NO-FMA-NEXT: retq 1066 ; 1067 ; HAS-FMA-LABEL: constrained_vector_log_v2f64: 1068 ; HAS-FMA: # %bb.0: # %entry 1069 ; HAS-FMA-NEXT: subq $24, %rsp 1070 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 1071 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1072 ; HAS-FMA-NEXT: callq log 1073 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1074 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1075 ; HAS-FMA-NEXT: callq log 1076 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1077 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1078 ; HAS-FMA-NEXT: addq $24, %rsp 1079 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1080 ; HAS-FMA-NEXT: retq 1081 entry: 1082 %log = call <2 x double> @llvm.experimental.constrained.log.v2f64( 1083 <2 x double> <double 42.0, double 42.1>, 1084 metadata !"round.dynamic", 1085 metadata !"fpexcept.strict") 1086 ret <2 x double> %log 1087 } 1088 1089 define <4 x double> @constrained_vector_log_v4f64() { 1090 ; NO-FMA-LABEL: constrained_vector_log_v4f64: 1091 ; NO-FMA: # %bb.0: # %entry 1092 ; NO-FMA-NEXT: subq $40, %rsp 1093 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 1094 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1095 ; NO-FMA-NEXT: callq log 1096 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1097 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1098 ; NO-FMA-NEXT: callq log 1099 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1100 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1101 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1102 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1103 ; NO-FMA-NEXT: callq log 1104 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1105 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1106 ; NO-FMA-NEXT: callq log 1107 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 1108 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1109 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 1110 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1111 ; NO-FMA-NEXT: addq $40, %rsp 1112 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1113 ; NO-FMA-NEXT: retq 1114 ; 1115 ; HAS-FMA-LABEL: constrained_vector_log_v4f64: 1116 ; HAS-FMA: # %bb.0: # %entry 1117 ; HAS-FMA-NEXT: subq $40, %rsp 1118 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 1119 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1120 ; HAS-FMA-NEXT: callq log 1121 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1122 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1123 ; HAS-FMA-NEXT: callq log 1124 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1125 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1126 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1127 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1128 ; HAS-FMA-NEXT: callq log 1129 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1130 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1131 ; HAS-FMA-NEXT: callq log 1132 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1133 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1134 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1135 ; HAS-FMA-NEXT: addq $40, %rsp 1136 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1137 ; HAS-FMA-NEXT: retq 1138 entry: 1139 %log = call <4 x double> @llvm.experimental.constrained.log.v4f64( 1140 <4 x double> <double 42.0, double 42.1, 1141 double 42.2, double 42.3>, 1142 metadata !"round.dynamic", 1143 metadata !"fpexcept.strict") 1144 ret <4 x double> %log 1145 } 1146 1147 define <2 x double> @constrained_vector_log10_v2f64() { 1148 ; NO-FMA-LABEL: constrained_vector_log10_v2f64: 1149 ; NO-FMA: # %bb.0: # %entry 1150 ; NO-FMA-NEXT: subq $24, %rsp 1151 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 1152 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1153 ; NO-FMA-NEXT: callq log10 1154 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1155 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1156 ; NO-FMA-NEXT: callq log10 1157 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1158 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1159 ; NO-FMA-NEXT: addq $24, %rsp 1160 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1161 ; NO-FMA-NEXT: retq 1162 ; 1163 ; HAS-FMA-LABEL: constrained_vector_log10_v2f64: 1164 ; HAS-FMA: # %bb.0: # %entry 1165 ; HAS-FMA-NEXT: subq $24, %rsp 1166 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 1167 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1168 ; HAS-FMA-NEXT: callq log10 1169 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1170 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1171 ; HAS-FMA-NEXT: callq log10 1172 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1173 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1174 ; HAS-FMA-NEXT: addq $24, %rsp 1175 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1176 ; HAS-FMA-NEXT: retq 1177 entry: 1178 %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64( 1179 <2 x double> <double 42.0, double 42.1>, 1180 metadata !"round.dynamic", 1181 metadata !"fpexcept.strict") 1182 ret <2 x double> %log10 1183 } 1184 1185 define <4 x double> @constrained_vector_log10_v4f64() { 1186 ; NO-FMA-LABEL: constrained_vector_log10_v4f64: 1187 ; NO-FMA: # %bb.0: # %entry 1188 ; NO-FMA-NEXT: subq $40, %rsp 1189 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 1190 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1191 ; NO-FMA-NEXT: callq log10 1192 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1193 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1194 ; NO-FMA-NEXT: callq log10 1195 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1196 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1197 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1198 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1199 ; NO-FMA-NEXT: callq log10 1200 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1201 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1202 ; NO-FMA-NEXT: callq log10 1203 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 1204 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1205 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 1206 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1207 ; NO-FMA-NEXT: addq $40, %rsp 1208 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1209 ; NO-FMA-NEXT: retq 1210 ; 1211 ; HAS-FMA-LABEL: constrained_vector_log10_v4f64: 1212 ; HAS-FMA: # %bb.0: # %entry 1213 ; HAS-FMA-NEXT: subq $40, %rsp 1214 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 1215 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1216 ; HAS-FMA-NEXT: callq log10 1217 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1218 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1219 ; HAS-FMA-NEXT: callq log10 1220 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1221 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1222 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1223 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1224 ; HAS-FMA-NEXT: callq log10 1225 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1226 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1227 ; HAS-FMA-NEXT: callq log10 1228 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1229 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1230 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1231 ; HAS-FMA-NEXT: addq $40, %rsp 1232 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1233 ; HAS-FMA-NEXT: retq 1234 entry: 1235 %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64( 1236 <4 x double> <double 42.0, double 42.1, 1237 double 42.2, double 42.3>, 1238 metadata !"round.dynamic", 1239 metadata !"fpexcept.strict") 1240 ret <4 x double> %log10 1241 } 1242 1243 define <2 x double> @constrained_vector_log2_v2f64() { 1244 ; NO-FMA-LABEL: constrained_vector_log2_v2f64: 1245 ; NO-FMA: # %bb.0: # %entry 1246 ; NO-FMA-NEXT: subq $24, %rsp 1247 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 1248 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1249 ; NO-FMA-NEXT: callq log2 1250 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1251 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1252 ; NO-FMA-NEXT: callq log2 1253 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1254 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1255 ; NO-FMA-NEXT: addq $24, %rsp 1256 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1257 ; NO-FMA-NEXT: retq 1258 ; 1259 ; HAS-FMA-LABEL: constrained_vector_log2_v2f64: 1260 ; HAS-FMA: # %bb.0: # %entry 1261 ; HAS-FMA-NEXT: subq $24, %rsp 1262 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 1263 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1264 ; HAS-FMA-NEXT: callq log2 1265 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1266 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1267 ; HAS-FMA-NEXT: callq log2 1268 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1269 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1270 ; HAS-FMA-NEXT: addq $24, %rsp 1271 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1272 ; HAS-FMA-NEXT: retq 1273 entry: 1274 %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64( 1275 <2 x double> <double 42.0, double 42.1>, 1276 metadata !"round.dynamic", 1277 metadata !"fpexcept.strict") 1278 ret <2 x double> %log2 1279 } 1280 1281 define <4 x double> @constrained_vector_log2_v4f64() { 1282 ; NO-FMA-LABEL: constrained_vector_log2_v4f64: 1283 ; NO-FMA: # %bb.0: # %entry 1284 ; NO-FMA-NEXT: subq $40, %rsp 1285 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 1286 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1287 ; NO-FMA-NEXT: callq log2 1288 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1289 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1290 ; NO-FMA-NEXT: callq log2 1291 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1292 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1293 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1294 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1295 ; NO-FMA-NEXT: callq log2 1296 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1297 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1298 ; NO-FMA-NEXT: callq log2 1299 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 1300 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1301 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 1302 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1303 ; NO-FMA-NEXT: addq $40, %rsp 1304 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1305 ; NO-FMA-NEXT: retq 1306 ; 1307 ; HAS-FMA-LABEL: constrained_vector_log2_v4f64: 1308 ; HAS-FMA: # %bb.0: # %entry 1309 ; HAS-FMA-NEXT: subq $40, %rsp 1310 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 1311 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1312 ; HAS-FMA-NEXT: callq log2 1313 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1314 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1315 ; HAS-FMA-NEXT: callq log2 1316 ; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1317 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1318 ; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1319 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1320 ; HAS-FMA-NEXT: callq log2 1321 ; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1322 ; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1323 ; HAS-FMA-NEXT: callq log2 1324 ; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1325 ; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1326 ; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1327 ; HAS-FMA-NEXT: addq $40, %rsp 1328 ; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 1329 ; HAS-FMA-NEXT: retq 1330 entry: 1331 %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64( 1332 <4 x double> <double 42.0, double 42.1, 1333 double 42.2, double 42.3>, 1334 metadata !"round.dynamic", 1335 metadata !"fpexcept.strict") 1336 ret <4 x double> %log2 1337 } 1338 1339 define <2 x double> @constrained_vector_rint_v2f64() { 1340 ; NO-FMA-LABEL: constrained_vector_rint_v2f64: 1341 ; NO-FMA: # %bb.0: # %entry 1342 ; NO-FMA-NEXT: subq $24, %rsp 1343 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 1344 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1345 ; NO-FMA-NEXT: callq rint 1346 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1347 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1348 ; NO-FMA-NEXT: callq rint 1349 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1350 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1351 ; NO-FMA-NEXT: addq $24, %rsp 1352 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1353 ; NO-FMA-NEXT: retq 1354 ; 1355 ; HAS-FMA-LABEL: constrained_vector_rint_v2f64: 1356 ; HAS-FMA: # %bb.0: # %entry 1357 ; HAS-FMA-NEXT: vroundpd $4, {{.*}}(%rip), %xmm0 1358 ; HAS-FMA-NEXT: retq 1359 entry: 1360 %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( 1361 <2 x double> <double 42.1, double 42.0>, 1362 metadata !"round.dynamic", 1363 metadata !"fpexcept.strict") 1364 ret <2 x double> %rint 1365 } 1366 1367 define <4 x double> @constrained_vector_rint_v4f64() { 1368 ; NO-FMA-LABEL: constrained_vector_rint_v4f64: 1369 ; NO-FMA: # %bb.0: # %entry 1370 ; NO-FMA-NEXT: subq $40, %rsp 1371 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 1372 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1373 ; NO-FMA-NEXT: callq rint 1374 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1375 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1376 ; NO-FMA-NEXT: callq rint 1377 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1378 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1379 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1380 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1381 ; NO-FMA-NEXT: callq rint 1382 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1383 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1384 ; NO-FMA-NEXT: callq rint 1385 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 1386 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1387 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 1388 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1389 ; NO-FMA-NEXT: addq $40, %rsp 1390 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1391 ; NO-FMA-NEXT: retq 1392 ; 1393 ; HAS-FMA-LABEL: constrained_vector_rint_v4f64: 1394 ; HAS-FMA: # %bb.0: # %entry 1395 ; HAS-FMA-NEXT: vroundpd $4, {{.*}}(%rip), %ymm0 1396 ; HAS-FMA-NEXT: retq 1397 entry: 1398 %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( 1399 <4 x double> <double 42.1, double 42.2, 1400 double 42.3, double 42.4>, 1401 metadata !"round.dynamic", 1402 metadata !"fpexcept.strict") 1403 ret <4 x double> %rint 1404 } 1405 1406 define <2 x double> @constrained_vector_nearbyint_v2f64() { 1407 ; NO-FMA-LABEL: constrained_vector_nearbyint_v2f64: 1408 ; NO-FMA: # %bb.0: # %entry 1409 ; NO-FMA-NEXT: subq $24, %rsp 1410 ; NO-FMA-NEXT: .cfi_def_cfa_offset 32 1411 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1412 ; NO-FMA-NEXT: callq nearbyint 1413 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1414 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1415 ; NO-FMA-NEXT: callq nearbyint 1416 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1417 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1418 ; NO-FMA-NEXT: addq $24, %rsp 1419 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1420 ; NO-FMA-NEXT: retq 1421 ; 1422 ; HAS-FMA-LABEL: constrained_vector_nearbyint_v2f64: 1423 ; HAS-FMA: # %bb.0: # %entry 1424 ; HAS-FMA-NEXT: vroundpd $12, {{.*}}(%rip), %xmm0 1425 ; HAS-FMA-NEXT: retq 1426 entry: 1427 %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( 1428 <2 x double> <double 42.1, double 42.0>, 1429 metadata !"round.dynamic", 1430 metadata !"fpexcept.strict") 1431 ret <2 x double> %nearby 1432 } 1433 1434 define <4 x double> @constrained_vector_nearbyint_v4f64() { 1435 ; NO-FMA-LABEL: constrained_vector_nearbyint_v4f64: 1436 ; NO-FMA: # %bb.0: # %entry 1437 ; NO-FMA-NEXT: subq $40, %rsp 1438 ; NO-FMA-NEXT: .cfi_def_cfa_offset 48 1439 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1440 ; NO-FMA-NEXT: callq nearbyint 1441 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1442 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1443 ; NO-FMA-NEXT: callq nearbyint 1444 ; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1445 ; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] 1446 ; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1447 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1448 ; NO-FMA-NEXT: callq nearbyint 1449 ; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1450 ; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1451 ; NO-FMA-NEXT: callq nearbyint 1452 ; NO-FMA-NEXT: movaps %xmm0, %xmm1 1453 ; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1454 ; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] 1455 ; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1456 ; NO-FMA-NEXT: addq $40, %rsp 1457 ; NO-FMA-NEXT: .cfi_def_cfa_offset 8 1458 ; NO-FMA-NEXT: retq 1459 ; 1460 ; HAS-FMA-LABEL: constrained_vector_nearbyint_v4f64: 1461 ; HAS-FMA: # %bb.0: # %entry 1462 ; HAS-FMA-NEXT: vroundpd $12, {{.*}}(%rip), %ymm0 1463 ; HAS-FMA-NEXT: retq 1464 entry: 1465 %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( 1466 <4 x double> <double 42.1, double 42.2, 1467 double 42.3, double 42.4>, 1468 metadata !"round.dynamic", 1469 metadata !"fpexcept.strict") 1470 ret <4 x double> %nearby 1471 } 1472 1473 ; Single width declarations 1474 declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) 1475 declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) 1476 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) 1477 declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) 1478 declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) 1479 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) 1480 declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) 1481 declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) 1482 declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) 1483 declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) 1484 declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) 1485 declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) 1486 declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) 1487 declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) 1488 declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata) 1489 declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata) 1490 declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) 1491 declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) 1492 1493 ; Double width declarations 1494 declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) 1495 declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata) 1496 declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) 1497 declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata) 1498 declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata) 1499 declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata) 1500 declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) 1501 declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata) 1502 declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) 1503 declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) 1504 declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) 1505 declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) 1506 declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) 1507 declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) 1508 declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata) 1509 declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata) 1510 declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) 1511 declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) 1512