Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
      3 ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
      4 
      5 define <2 x double> @constrained_vector_fdiv_v2f64() {
      6 ; NO-FMA-LABEL: constrained_vector_fdiv_v2f64:
      7 ; NO-FMA:       # %bb.0: # %entry
      8 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
      9 ; NO-FMA-NEXT:    divpd {{.*}}(%rip), %xmm0
     10 ; NO-FMA-NEXT:    retq
     11 ;
     12 ; HAS-FMA-LABEL: constrained_vector_fdiv_v2f64:
     13 ; HAS-FMA:       # %bb.0: # %entry
     14 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
     15 ; HAS-FMA-NEXT:    vdivpd {{.*}}(%rip), %xmm0, %xmm0
     16 ; HAS-FMA-NEXT:    retq
     17 entry:
     18   %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
     19            <2 x double> <double 1.000000e+00, double 2.000000e+00>,
     20            <2 x double> <double 1.000000e+01, double 1.000000e+01>,
     21            metadata !"round.dynamic",
     22            metadata !"fpexcept.strict")
     23   ret <2 x double> %div
     24 }
     25 
     26 define <4 x double> @constrained_vector_fdiv_v4f64() {
     27 ; NO-FMA-LABEL: constrained_vector_fdiv_v4f64:
     28 ; NO-FMA:       # %bb.0:
     29 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm2 = [1.000000e+01,1.000000e+01]
     30 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
     31 ; NO-FMA-NEXT:    divpd %xmm2, %xmm0
     32 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm1 = [3.000000e+00,4.000000e+00]
     33 ; NO-FMA-NEXT:    divpd %xmm2, %xmm1
     34 ; NO-FMA-NEXT:    retq
     35 ;
     36 ; HAS-FMA-LABEL: constrained_vector_fdiv_v4f64:
     37 ; HAS-FMA:       # %bb.0:
     38 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} ymm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
     39 ; HAS-FMA-NEXT:    vdivpd {{.*}}(%rip), %ymm0, %ymm0
     40 ; HAS-FMA-NEXT:    retq
     41   %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
     42            <4 x double> <double 1.000000e+00, double 2.000000e+00,
     43                          double 3.000000e+00, double 4.000000e+00>,
     44            <4 x double> <double 1.000000e+01, double 1.000000e+01,
     45                          double 1.000000e+01, double 1.000000e+01>,
     46            metadata !"round.dynamic",
     47            metadata !"fpexcept.strict")
     48   ret <4 x double> %div
     49 }
     50 
     51 define <2 x double> @constrained_vector_fmul_v2f64() {
     52 ; NO-FMA-LABEL: constrained_vector_fmul_v2f64:
     53 ; NO-FMA:       # %bb.0: # %entry
     54 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
     55 ; NO-FMA-NEXT:    mulpd {{.*}}(%rip), %xmm0
     56 ; NO-FMA-NEXT:    retq
     57 ;
     58 ; HAS-FMA-LABEL: constrained_vector_fmul_v2f64:
     59 ; HAS-FMA:       # %bb.0: # %entry
     60 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
     61 ; HAS-FMA-NEXT:    vmulpd {{.*}}(%rip), %xmm0, %xmm0
     62 ; HAS-FMA-NEXT:    retq
     63 entry:
     64   %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
     65            <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
     66            <2 x double> <double 2.000000e+00, double 3.000000e+00>,
     67            metadata !"round.dynamic",
     68            metadata !"fpexcept.strict")
     69   ret <2 x double> %mul
     70 }
     71 
     72 define <4 x double> @constrained_vector_fmul_v4f64() {
     73 ; NO-FMA-LABEL: constrained_vector_fmul_v4f64:
     74 ; NO-FMA:       # %bb.0: # %entry
     75 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
     76 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [2.000000e+00,3.000000e+00]
     77 ; NO-FMA-NEXT:    mulpd %xmm1, %xmm0
     78 ; NO-FMA-NEXT:    mulpd {{.*}}(%rip), %xmm1
     79 ; NO-FMA-NEXT:    retq
     80 ;
     81 ; HAS-FMA-LABEL: constrained_vector_fmul_v4f64:
     82 ; HAS-FMA:       # %bb.0: # %entry
     83 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308]
     84 ; HAS-FMA-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
     85 ; HAS-FMA-NEXT:    retq
     86 entry:
     87   %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
     88            <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
     89                          double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
     90            <4 x double> <double 2.000000e+00, double 3.000000e+00,
     91                          double 4.000000e+00, double 5.000000e+00>,
     92            metadata !"round.dynamic",
     93            metadata !"fpexcept.strict")
     94   ret <4 x double> %mul
     95 }
     96 
     97 
     98 define <2 x double> @constrained_vector_fadd_v2f64() {
     99 ; NO-FMA-LABEL: constrained_vector_fadd_v2f64:
    100 ; NO-FMA:       # %bb.0: # %entry
    101 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
    102 ; NO-FMA-NEXT:    addpd {{.*}}(%rip), %xmm0
    103 ; NO-FMA-NEXT:    retq
    104 ;
    105 ; HAS-FMA-LABEL: constrained_vector_fadd_v2f64:
    106 ; HAS-FMA:       # %bb.0: # %entry
    107 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
    108 ; HAS-FMA-NEXT:    vaddpd {{.*}}(%rip), %xmm0, %xmm0
    109 ; HAS-FMA-NEXT:    retq
    110 entry:
    111   %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
    112            <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
    113            <2 x double> <double 1.000000e+00, double 1.000000e-01>,
    114            metadata !"round.dynamic",
    115            metadata !"fpexcept.strict")
    116   ret <2 x double> %add
    117 }
    118 
    119 define <4 x double> @constrained_vector_fadd_v4f64() {
    120 ; NO-FMA-LABEL: constrained_vector_fadd_v4f64:
    121 ; NO-FMA:       # %bb.0: # %entry
    122 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
    123 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [1.000000e+00,1.000000e-01]
    124 ; NO-FMA-NEXT:    addpd %xmm1, %xmm0
    125 ; NO-FMA-NEXT:    addpd {{.*}}(%rip), %xmm1
    126 ; NO-FMA-NEXT:    retq
    127 ;
    128 ; HAS-FMA-LABEL: constrained_vector_fadd_v4f64:
    129 ; HAS-FMA:       # %bb.0: # %entry
    130 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308]
    131 ; HAS-FMA-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
    132 ; HAS-FMA-NEXT:    retq
    133 entry:
    134   %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
    135            <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
    136                          double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
    137            <4 x double> <double 1.000000e+00, double 1.000000e-01,
    138                          double 2.000000e+00, double 2.000000e-01>,
    139            metadata !"round.dynamic",
    140            metadata !"fpexcept.strict")
    141   ret <4 x double> %add
    142 }
    143 
    144 define <2 x double> @constrained_vector_fsub_v2f64() {
    145 ; NO-FMA-LABEL: constrained_vector_fsub_v2f64:
    146 ; NO-FMA:       # %bb.0: # %entry
    147 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
    148 ; NO-FMA-NEXT:    subpd {{.*}}(%rip), %xmm0
    149 ; NO-FMA-NEXT:    retq
    150 ;
    151 ; HAS-FMA-LABEL: constrained_vector_fsub_v2f64:
    152 ; HAS-FMA:       # %bb.0: # %entry
    153 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
    154 ; HAS-FMA-NEXT:    vsubpd {{.*}}(%rip), %xmm0, %xmm0
    155 ; HAS-FMA-NEXT:    retq
    156 entry:
    157   %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
    158            <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
    159            <2 x double> <double 1.000000e+00, double 1.000000e-01>,
    160            metadata !"round.dynamic",
    161            metadata !"fpexcept.strict")
    162   ret <2 x double> %sub
    163 }
    164 
    165 define <4 x double> @constrained_vector_fsub_v4f64() {
    166 ; NO-FMA-LABEL: constrained_vector_fsub_v4f64:
    167 ; NO-FMA:       # %bb.0: # %entry
    168 ; NO-FMA-NEXT:    movapd {{.*#+}} xmm1 = [-1.797693e+308,-1.797693e+308]
    169 ; NO-FMA-NEXT:    movapd %xmm1, %xmm0
    170 ; NO-FMA-NEXT:    subpd {{.*}}(%rip), %xmm0
    171 ; NO-FMA-NEXT:    subpd {{.*}}(%rip), %xmm1
    172 ; NO-FMA-NEXT:    retq
    173 ;
    174 ; HAS-FMA-LABEL: constrained_vector_fsub_v4f64:
    175 ; HAS-FMA:       # %bb.0: # %entry
    176 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} ymm0 = [-1.797693e+308,-1.797693e+308,-1.797693e+308,-1.797693e+308]
    177 ; HAS-FMA-NEXT:    vsubpd {{.*}}(%rip), %ymm0, %ymm0
    178 ; HAS-FMA-NEXT:    retq
    179 entry:
    180   %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
    181            <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
    182                          double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
    183            <4 x double> <double 1.000000e+00, double 1.000000e-01,
    184                          double 2.000000e+00, double 2.000000e-01>,
    185            metadata !"round.dynamic",
    186            metadata !"fpexcept.strict")
    187   ret <4 x double> %sub
    188 }
    189 
    190 define <2 x double> @constrained_vector_fma_v2f64() {
    191 ; NO-FMA-LABEL: constrained_vector_fma_v2f64:
    192 ; NO-FMA:       # %bb.0: # %entry
    193 ; NO-FMA-NEXT:    subq $24, %rsp
    194 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    195 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    196 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    197 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    198 ; NO-FMA-NEXT:    callq fma
    199 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    200 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    201 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    202 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    203 ; NO-FMA-NEXT:    callq fma
    204 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    205 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    206 ; NO-FMA-NEXT:    addq $24, %rsp
    207 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    208 ; NO-FMA-NEXT:    retq
    209 ;
    210 ; HAS-FMA-LABEL: constrained_vector_fma_v2f64:
    211 ; HAS-FMA:       # %bb.0: # %entry
    212 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} xmm1 = [1.500000e+00,5.000000e-01]
    213 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} xmm0 = [3.500000e+00,2.500000e+00]
    214 ; HAS-FMA-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
    215 ; HAS-FMA-NEXT:    retq
    216 entry:
    217   %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
    218            <2 x double> <double 1.5, double 0.5>,
    219            <2 x double> <double 3.5, double 2.5>,
    220            <2 x double> <double 5.5, double 4.5>,
    221            metadata !"round.dynamic",
    222            metadata !"fpexcept.strict")
    223   ret <2 x double> %fma
    224 }
    225 
    226 define <4 x double> @constrained_vector_fma_v4f64() {
    227 ; NO-FMA-LABEL: constrained_vector_fma_v4f64:
    228 ; NO-FMA:       # %bb.0: # %entry
    229 ; NO-FMA-NEXT:    subq $40, %rsp
    230 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    231 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    232 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    233 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    234 ; NO-FMA-NEXT:    callq fma
    235 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    236 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    237 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    238 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    239 ; NO-FMA-NEXT:    callq fma
    240 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    241 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    242 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    243 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    244 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    245 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    246 ; NO-FMA-NEXT:    callq fma
    247 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    248 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    249 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    250 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
    251 ; NO-FMA-NEXT:    callq fma
    252 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    253 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    254 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    255 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    256 ; NO-FMA-NEXT:    addq $40, %rsp
    257 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    258 ; NO-FMA-NEXT:    retq
    259 ;
    260 ; HAS-FMA-LABEL: constrained_vector_fma_v4f64:
    261 ; HAS-FMA:       # %bb.0: # %entry
    262 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01]
    263 ; HAS-FMA-NEXT:    vmovapd {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
    264 ; HAS-FMA-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
    265 ; HAS-FMA-NEXT:    retq
    266 entry:
    267   %fma = call <4 x double> @llvm.experimental.constrained.fma.v4f64(
    268            <4 x double> <double 3.5, double 2.5, double 1.5, double 0.5>,
    269            <4 x double> <double 7.5, double 6.5, double 5.5, double 4.5>,
    270            <4 x double> <double 11.5, double 10.5, double 9.5, double 8.5>,
    271            metadata !"round.dynamic",
    272            metadata !"fpexcept.strict")
    273   ret <4 x double> %fma
    274 }
    275 
    276 define <4 x float> @constrained_vector_fma_v4f32() {
    277 ; NO-FMA-LABEL: constrained_vector_fma_v4f32:
    278 ; NO-FMA:       # %bb.0: # %entry
    279 ; NO-FMA-NEXT:    subq $40, %rsp
    280 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    281 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    282 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    283 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    284 ; NO-FMA-NEXT:    callq fmaf
    285 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    286 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    287 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    288 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    289 ; NO-FMA-NEXT:    callq fmaf
    290 ; NO-FMA-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
    291 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
    292 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    293 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    294 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    295 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    296 ; NO-FMA-NEXT:    callq fmaf
    297 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    298 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    299 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    300 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    301 ; NO-FMA-NEXT:    callq fmaf
    302 ; NO-FMA-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
    303 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
    304 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    305 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    306 ; NO-FMA-NEXT:    addq $40, %rsp
    307 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    308 ; NO-FMA-NEXT:    retq
    309 ;
    310 ; HAS-FMA-LABEL: constrained_vector_fma_v4f32:
    311 ; HAS-FMA:       # %bb.0: # %entry
    312 ; HAS-FMA-NEXT:    vmovaps {{.*#+}} xmm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01]
    313 ; HAS-FMA-NEXT:    vmovaps {{.*#+}} xmm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
    314 ; HAS-FMA-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
    315 ; HAS-FMA-NEXT:    retq
    316 entry:
    317   %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
    318            <4 x float> <float 3.5, float 2.5, float 1.5, float 0.5>,
    319            <4 x float> <float 7.5, float 6.5, float 5.5, float 4.5>,
    320            <4 x float> <float 11.5, float 10.5, float 9.5, float 8.5>,
    321            metadata !"round.dynamic",
    322            metadata !"fpexcept.strict")
    323   ret <4 x float> %fma
    324 }
    325 
    326 define <8 x float> @constrained_vector_fma_v8f32() {
    327 ; NO-FMA-LABEL: constrained_vector_fma_v8f32:
    328 ; NO-FMA:       # %bb.0: # %entry
    329 ; NO-FMA-NEXT:    subq $56, %rsp
    330 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 64
    331 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    332 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    333 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    334 ; NO-FMA-NEXT:    callq fmaf
    335 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    336 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    337 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    338 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    339 ; NO-FMA-NEXT:    callq fmaf
    340 ; NO-FMA-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
    341 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
    342 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    343 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    344 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    345 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    346 ; NO-FMA-NEXT:    callq fmaf
    347 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    348 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    349 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    350 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    351 ; NO-FMA-NEXT:    callq fmaf
    352 ; NO-FMA-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
    353 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
    354 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
    355 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    356 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    357 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    358 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    359 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    360 ; NO-FMA-NEXT:    callq fmaf
    361 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    362 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    363 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    364 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    365 ; NO-FMA-NEXT:    callq fmaf
    366 ; NO-FMA-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
    367 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
    368 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    369 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    370 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    371 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    372 ; NO-FMA-NEXT:    callq fmaf
    373 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    374 ; NO-FMA-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    375 ; NO-FMA-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    376 ; NO-FMA-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    377 ; NO-FMA-NEXT:    callq fmaf
    378 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    379 ; NO-FMA-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    380 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
    381 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
    382 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    383 ; NO-FMA-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
    384 ; NO-FMA-NEXT:    addq $56, %rsp
    385 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    386 ; NO-FMA-NEXT:    retq
    387 ;
    388 ; HAS-FMA-LABEL: constrained_vector_fma_v8f32:
    389 ; HAS-FMA:       # %bb.0: # %entry
    390 ; HAS-FMA-NEXT:    vmovaps {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01,7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
    391 ; HAS-FMA-NEXT:    vmovaps {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00,1.150000e+01,1.050000e+01,9.500000e+00,8.500000e+00]
    392 ; HAS-FMA-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
    393 ; HAS-FMA-NEXT:    retq
    394 entry:
    395   %fma = call <8 x float> @llvm.experimental.constrained.fma.v8f32(
    396            <8 x float> <float 3.5, float 2.5, float 1.5, float 0.5,
    397                         float 7.5, float 6.5, float 5.5, float 4.5>,
    398            <8 x float> <float 7.5, float 6.5, float 5.5, float 4.5,
    399                         float 11.5, float 10.5, float 9.5, float 8.5>,
    400            <8 x float> <float 11.5, float 10.5, float 9.5, float 8.5,
    401                         float 15.5, float 14.5, float 13.5, float 12.5>,
    402            metadata !"round.dynamic",
    403            metadata !"fpexcept.strict")
    404   ret <8 x float> %fma
    405 }
    406 
    407 define <2 x double> @constrained_vector_sqrt_v2f64() {
    408 ; NO-FMA-LABEL: constrained_vector_sqrt_v2f64:
    409 ; NO-FMA:       # %bb.0: # %entry
    410 ; NO-FMA-NEXT:    sqrtpd {{.*}}(%rip), %xmm0
    411 ; NO-FMA-NEXT:    retq
    412 ;
    413 ; HAS-FMA-LABEL: constrained_vector_sqrt_v2f64:
    414 ; HAS-FMA:       # %bb.0: # %entry
    415 ; HAS-FMA-NEXT:    vsqrtpd {{.*}}(%rip), %xmm0
    416 ; HAS-FMA-NEXT:    retq
    417 entry:
    418   %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
    419                               <2 x double> <double 42.0, double 42.1>,
    420                               metadata !"round.dynamic",
    421                               metadata !"fpexcept.strict")
    422   ret <2 x double> %sqrt
    423 }
    424 
    425 define <4 x double> @constrained_vector_sqrt_v4f64() {
    426 ; NO-FMA-LABEL: constrained_vector_sqrt_v4f64:
    427 ; NO-FMA:       # %bb.0: # %entry
    428 ; NO-FMA-NEXT:    sqrtpd {{.*}}(%rip), %xmm0
    429 ; NO-FMA-NEXT:    sqrtpd {{.*}}(%rip), %xmm1
    430 ; NO-FMA-NEXT:    retq
    431 ;
    432 ; HAS-FMA-LABEL: constrained_vector_sqrt_v4f64:
    433 ; HAS-FMA:       # %bb.0: # %entry
    434 ; HAS-FMA-NEXT:    vsqrtpd {{.*}}(%rip), %ymm0
    435 ; HAS-FMA-NEXT:    retq
    436 entry:
    437   %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
    438                               <4 x double> <double 42.0, double 42.1,
    439                                             double 42.2, double 42.3>,
    440                               metadata !"round.dynamic",
    441                               metadata !"fpexcept.strict")
    442   ret <4 x double> %sqrt
    443 }
    444 
    445 define <2 x double> @constrained_vector_pow_v2f64() {
    446 ; NO-FMA-LABEL: constrained_vector_pow_v2f64:
    447 ; NO-FMA:       # %bb.0: # %entry
    448 ; NO-FMA-NEXT:    subq $24, %rsp
    449 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    450 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    451 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    452 ; NO-FMA-NEXT:    callq pow
    453 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    454 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    455 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    456 ; NO-FMA-NEXT:    callq pow
    457 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    458 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    459 ; NO-FMA-NEXT:    addq $24, %rsp
    460 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    461 ; NO-FMA-NEXT:    retq
    462 ;
    463 ; HAS-FMA-LABEL: constrained_vector_pow_v2f64:
    464 ; HAS-FMA:       # %bb.0: # %entry
    465 ; HAS-FMA-NEXT:    subq $24, %rsp
    466 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
    467 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    468 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    469 ; HAS-FMA-NEXT:    callq pow
    470 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    471 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    472 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    473 ; HAS-FMA-NEXT:    callq pow
    474 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    475 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    476 ; HAS-FMA-NEXT:    addq $24, %rsp
    477 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    478 ; HAS-FMA-NEXT:    retq
    479 entry:
    480   %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
    481                              <2 x double> <double 42.1, double 42.2>,
    482                              <2 x double> <double 3.0, double 3.0>,
    483                              metadata !"round.dynamic",
    484                              metadata !"fpexcept.strict")
    485   ret <2 x double> %pow
    486 }
    487 
    488 define <4 x double> @constrained_vector_pow_v4f64() {
    489 ; NO-FMA-LABEL: constrained_vector_pow_v4f64:
    490 ; NO-FMA:       # %bb.0: # %entry
    491 ; NO-FMA-NEXT:    subq $40, %rsp
    492 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    493 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    494 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    495 ; NO-FMA-NEXT:    callq pow
    496 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    497 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    498 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    499 ; NO-FMA-NEXT:    callq pow
    500 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    501 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    502 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    503 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    504 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    505 ; NO-FMA-NEXT:    callq pow
    506 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    507 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    508 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
    509 ; NO-FMA-NEXT:    callq pow
    510 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    511 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    512 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    513 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    514 ; NO-FMA-NEXT:    addq $40, %rsp
    515 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    516 ; NO-FMA-NEXT:    retq
    517 ;
    518 ; HAS-FMA-LABEL: constrained_vector_pow_v4f64:
    519 ; HAS-FMA:       # %bb.0: # %entry
    520 ; HAS-FMA-NEXT:    subq $40, %rsp
    521 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
    522 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    523 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    524 ; HAS-FMA-NEXT:    callq pow
    525 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    526 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    527 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    528 ; HAS-FMA-NEXT:    callq pow
    529 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    530 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    531 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    532 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    533 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    534 ; HAS-FMA-NEXT:    callq pow
    535 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    536 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    537 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    538 ; HAS-FMA-NEXT:    callq pow
    539 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
    540 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    541 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
    542 ; HAS-FMA-NEXT:    addq $40, %rsp
    543 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    544 ; HAS-FMA-NEXT:    retq
    545 entry:
    546   %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
    547                              <4 x double> <double 42.1, double 42.2,
    548                                            double 42.3, double 42.4>,
    549                              <4 x double> <double 3.0, double 3.0,
    550                                            double 3.0, double 3.0>,
    551                              metadata !"round.dynamic",
    552                              metadata !"fpexcept.strict")
    553   ret <4 x double> %pow
    554 }
    555 
    556 define <2 x double> @constrained_vector_powi_v2f64() {
    557 ; NO-FMA-LABEL: constrained_vector_powi_v2f64:
    558 ; NO-FMA:       # %bb.0: # %entry
    559 ; NO-FMA-NEXT:    subq $24, %rsp
    560 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    561 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    562 ; NO-FMA-NEXT:    movl $3, %edi
    563 ; NO-FMA-NEXT:    callq __powidf2
    564 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    565 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    566 ; NO-FMA-NEXT:    movl $3, %edi
    567 ; NO-FMA-NEXT:    callq __powidf2
    568 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    569 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    570 ; NO-FMA-NEXT:    addq $24, %rsp
    571 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    572 ; NO-FMA-NEXT:    retq
    573 ;
    574 ; HAS-FMA-LABEL: constrained_vector_powi_v2f64:
    575 ; HAS-FMA:       # %bb.0: # %entry
    576 ; HAS-FMA-NEXT:    subq $24, %rsp
    577 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
    578 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    579 ; HAS-FMA-NEXT:    movl $3, %edi
    580 ; HAS-FMA-NEXT:    callq __powidf2
    581 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    582 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    583 ; HAS-FMA-NEXT:    movl $3, %edi
    584 ; HAS-FMA-NEXT:    callq __powidf2
    585 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    586 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    587 ; HAS-FMA-NEXT:    addq $24, %rsp
    588 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    589 ; HAS-FMA-NEXT:    retq
    590 entry:
    591   %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
    592                               <2 x double> <double 42.1, double 42.2>,
    593                               i32 3,
    594                               metadata !"round.dynamic",
    595                               metadata !"fpexcept.strict")
    596   ret <2 x double> %powi
    597 }
    598 
    599 define <4 x double> @constrained_vector_powi_v4f64() {
    600 ; NO-FMA-LABEL: constrained_vector_powi_v4f64:
    601 ; NO-FMA:       # %bb.0: # %entry
    602 ; NO-FMA-NEXT:    subq $40, %rsp
    603 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    604 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    605 ; NO-FMA-NEXT:    movl $3, %edi
    606 ; NO-FMA-NEXT:    callq __powidf2
    607 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    608 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    609 ; NO-FMA-NEXT:    movl $3, %edi
    610 ; NO-FMA-NEXT:    callq __powidf2
    611 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    612 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    613 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    614 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    615 ; NO-FMA-NEXT:    movl $3, %edi
    616 ; NO-FMA-NEXT:    callq __powidf2
    617 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    618 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    619 ; NO-FMA-NEXT:    movl $3, %edi
    620 ; NO-FMA-NEXT:    callq __powidf2
    621 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    622 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    623 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    624 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    625 ; NO-FMA-NEXT:    addq $40, %rsp
    626 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    627 ; NO-FMA-NEXT:    retq
    628 ;
    629 ; HAS-FMA-LABEL: constrained_vector_powi_v4f64:
    630 ; HAS-FMA:       # %bb.0: # %entry
    631 ; HAS-FMA-NEXT:    subq $40, %rsp
    632 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
    633 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    634 ; HAS-FMA-NEXT:    movl $3, %edi
    635 ; HAS-FMA-NEXT:    callq __powidf2
    636 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    637 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    638 ; HAS-FMA-NEXT:    movl $3, %edi
    639 ; HAS-FMA-NEXT:    callq __powidf2
    640 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    641 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    642 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    643 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    644 ; HAS-FMA-NEXT:    movl $3, %edi
    645 ; HAS-FMA-NEXT:    callq __powidf2
    646 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    647 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    648 ; HAS-FMA-NEXT:    movl $3, %edi
    649 ; HAS-FMA-NEXT:    callq __powidf2
    650 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
    651 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    652 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
    653 ; HAS-FMA-NEXT:    addq $40, %rsp
    654 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    655 ; HAS-FMA-NEXT:    retq
    656 entry:
    657   %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
    658                               <4 x double> <double 42.1, double 42.2,
    659                                             double 42.3, double 42.4>,
    660                               i32 3,
    661                               metadata !"round.dynamic",
    662                               metadata !"fpexcept.strict")
    663   ret <4 x double> %powi
    664 }
    665 
    666 
    667 define <2 x double> @constrained_vector_sin_v2f64() {
    668 ; NO-FMA-LABEL: constrained_vector_sin_v2f64:
    669 ; NO-FMA:       # %bb.0: # %entry
    670 ; NO-FMA-NEXT:    subq $24, %rsp
    671 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    672 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    673 ; NO-FMA-NEXT:    callq sin
    674 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    675 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    676 ; NO-FMA-NEXT:    callq sin
    677 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    678 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    679 ; NO-FMA-NEXT:    addq $24, %rsp
    680 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    681 ; NO-FMA-NEXT:    retq
    682 ;
    683 ; HAS-FMA-LABEL: constrained_vector_sin_v2f64:
    684 ; HAS-FMA:       # %bb.0: # %entry
    685 ; HAS-FMA-NEXT:    subq $24, %rsp
    686 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
    687 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    688 ; HAS-FMA-NEXT:    callq sin
    689 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    690 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    691 ; HAS-FMA-NEXT:    callq sin
    692 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    693 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    694 ; HAS-FMA-NEXT:    addq $24, %rsp
    695 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    696 ; HAS-FMA-NEXT:    retq
    697 entry:
    698   %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
    699                              <2 x double> <double 42.0, double 42.1>,
    700                              metadata !"round.dynamic",
    701                              metadata !"fpexcept.strict")
    702   ret <2 x double> %sin
    703 }
    704 
    705 define <4 x double> @constrained_vector_sin_v4f64() {
    706 ; NO-FMA-LABEL: constrained_vector_sin_v4f64:
    707 ; NO-FMA:       # %bb.0: # %entry
    708 ; NO-FMA-NEXT:    subq $40, %rsp
    709 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    710 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    711 ; NO-FMA-NEXT:    callq sin
    712 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    713 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    714 ; NO-FMA-NEXT:    callq sin
    715 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    716 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    717 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    718 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    719 ; NO-FMA-NEXT:    callq sin
    720 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    721 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    722 ; NO-FMA-NEXT:    callq sin
    723 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    724 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    725 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    726 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    727 ; NO-FMA-NEXT:    addq $40, %rsp
    728 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    729 ; NO-FMA-NEXT:    retq
    730 ;
    731 ; HAS-FMA-LABEL: constrained_vector_sin_v4f64:
    732 ; HAS-FMA:       # %bb.0: # %entry
    733 ; HAS-FMA-NEXT:    subq $40, %rsp
    734 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
    735 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    736 ; HAS-FMA-NEXT:    callq sin
    737 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    738 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    739 ; HAS-FMA-NEXT:    callq sin
    740 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    741 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    742 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    743 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    744 ; HAS-FMA-NEXT:    callq sin
    745 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    746 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    747 ; HAS-FMA-NEXT:    callq sin
    748 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
    749 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    750 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
    751 ; HAS-FMA-NEXT:    addq $40, %rsp
    752 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    753 ; HAS-FMA-NEXT:    retq
    754 entry:
    755   %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
    756                              <4 x double> <double 42.0, double 42.1,
    757                                            double 42.2, double 42.3>,
    758                              metadata !"round.dynamic",
    759                              metadata !"fpexcept.strict")
    760   ret <4 x double> %sin
    761 }
    762 
    763 define <2 x double> @constrained_vector_cos_v2f64() {
    764 ; NO-FMA-LABEL: constrained_vector_cos_v2f64:
    765 ; NO-FMA:       # %bb.0: # %entry
    766 ; NO-FMA-NEXT:    subq $24, %rsp
    767 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    768 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    769 ; NO-FMA-NEXT:    callq cos
    770 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    771 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    772 ; NO-FMA-NEXT:    callq cos
    773 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    774 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    775 ; NO-FMA-NEXT:    addq $24, %rsp
    776 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    777 ; NO-FMA-NEXT:    retq
    778 ;
    779 ; HAS-FMA-LABEL: constrained_vector_cos_v2f64:
    780 ; HAS-FMA:       # %bb.0: # %entry
    781 ; HAS-FMA-NEXT:    subq $24, %rsp
    782 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
    783 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    784 ; HAS-FMA-NEXT:    callq cos
    785 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    786 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    787 ; HAS-FMA-NEXT:    callq cos
    788 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    789 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    790 ; HAS-FMA-NEXT:    addq $24, %rsp
    791 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    792 ; HAS-FMA-NEXT:    retq
    793 entry:
    794   %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
    795                              <2 x double> <double 42.0, double 42.1>,
    796                              metadata !"round.dynamic",
    797                              metadata !"fpexcept.strict")
    798   ret <2 x double> %cos
    799 }
    800 
    801 define <4 x double> @constrained_vector_cos_v4f64() {
    802 ; NO-FMA-LABEL: constrained_vector_cos_v4f64:
    803 ; NO-FMA:       # %bb.0: # %entry
    804 ; NO-FMA-NEXT:    subq $40, %rsp
    805 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    806 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    807 ; NO-FMA-NEXT:    callq cos
    808 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    809 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    810 ; NO-FMA-NEXT:    callq cos
    811 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    812 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    813 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    814 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    815 ; NO-FMA-NEXT:    callq cos
    816 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    817 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    818 ; NO-FMA-NEXT:    callq cos
    819 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    820 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    821 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    822 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    823 ; NO-FMA-NEXT:    addq $40, %rsp
    824 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    825 ; NO-FMA-NEXT:    retq
    826 ;
    827 ; HAS-FMA-LABEL: constrained_vector_cos_v4f64:
    828 ; HAS-FMA:       # %bb.0: # %entry
    829 ; HAS-FMA-NEXT:    subq $40, %rsp
    830 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
    831 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    832 ; HAS-FMA-NEXT:    callq cos
    833 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    834 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    835 ; HAS-FMA-NEXT:    callq cos
    836 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    837 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    838 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    839 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    840 ; HAS-FMA-NEXT:    callq cos
    841 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    842 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    843 ; HAS-FMA-NEXT:    callq cos
    844 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
    845 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    846 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
    847 ; HAS-FMA-NEXT:    addq $40, %rsp
    848 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    849 ; HAS-FMA-NEXT:    retq
    850 entry:
    851   %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
    852                              <4 x double> <double 42.0, double 42.1,
    853                                            double 42.2, double 42.3>,
    854                              metadata !"round.dynamic",
    855                              metadata !"fpexcept.strict")
    856   ret <4 x double> %cos
    857 }
    858 
    859 define <2 x double> @constrained_vector_exp_v2f64() {
    860 ; NO-FMA-LABEL: constrained_vector_exp_v2f64:
    861 ; NO-FMA:       # %bb.0: # %entry
    862 ; NO-FMA-NEXT:    subq $24, %rsp
    863 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    864 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    865 ; NO-FMA-NEXT:    callq exp
    866 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    867 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    868 ; NO-FMA-NEXT:    callq exp
    869 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    870 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    871 ; NO-FMA-NEXT:    addq $24, %rsp
    872 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    873 ; NO-FMA-NEXT:    retq
    874 ;
    875 ; HAS-FMA-LABEL: constrained_vector_exp_v2f64:
    876 ; HAS-FMA:       # %bb.0: # %entry
    877 ; HAS-FMA-NEXT:    subq $24, %rsp
    878 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
    879 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    880 ; HAS-FMA-NEXT:    callq exp
    881 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    882 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    883 ; HAS-FMA-NEXT:    callq exp
    884 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    885 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    886 ; HAS-FMA-NEXT:    addq $24, %rsp
    887 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    888 ; HAS-FMA-NEXT:    retq
    889 entry:
    890   %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
    891                              <2 x double> <double 42.0, double 42.1>,
    892                              metadata !"round.dynamic",
    893                              metadata !"fpexcept.strict")
    894   ret <2 x double> %exp
    895 }
    896 
    897 define <4 x double> @constrained_vector_exp_v4f64() {
    898 ; NO-FMA-LABEL: constrained_vector_exp_v4f64:
    899 ; NO-FMA:       # %bb.0: # %entry
    900 ; NO-FMA-NEXT:    subq $40, %rsp
    901 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    902 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    903 ; NO-FMA-NEXT:    callq exp
    904 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    905 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    906 ; NO-FMA-NEXT:    callq exp
    907 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    908 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    909 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    910 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    911 ; NO-FMA-NEXT:    callq exp
    912 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    913 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    914 ; NO-FMA-NEXT:    callq exp
    915 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
    916 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
    917 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
    918 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
    919 ; NO-FMA-NEXT:    addq $40, %rsp
    920 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    921 ; NO-FMA-NEXT:    retq
    922 ;
    923 ; HAS-FMA-LABEL: constrained_vector_exp_v4f64:
    924 ; HAS-FMA:       # %bb.0: # %entry
    925 ; HAS-FMA-NEXT:    subq $40, %rsp
    926 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
    927 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    928 ; HAS-FMA-NEXT:    callq exp
    929 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    930 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    931 ; HAS-FMA-NEXT:    callq exp
    932 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    933 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    934 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    935 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    936 ; HAS-FMA-NEXT:    callq exp
    937 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
    938 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    939 ; HAS-FMA-NEXT:    callq exp
    940 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
    941 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    942 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
    943 ; HAS-FMA-NEXT:    addq $40, %rsp
    944 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    945 ; HAS-FMA-NEXT:    retq
    946 entry:
    947   %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
    948                              <4 x double> <double 42.0, double 42.1,
    949                                            double 42.2, double 42.3>,
    950                              metadata !"round.dynamic",
    951                              metadata !"fpexcept.strict")
    952   ret <4 x double> %exp
    953 }
    954 
    955 define <2 x double> @constrained_vector_exp2_v2f64() {
    956 ; NO-FMA-LABEL: constrained_vector_exp2_v2f64:
    957 ; NO-FMA:       # %bb.0: # %entry
    958 ; NO-FMA-NEXT:    subq $24, %rsp
    959 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
    960 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    961 ; NO-FMA-NEXT:    callq exp2
    962 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
    963 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    964 ; NO-FMA-NEXT:    callq exp2
    965 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
    966 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    967 ; NO-FMA-NEXT:    addq $24, %rsp
    968 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
    969 ; NO-FMA-NEXT:    retq
    970 ;
    971 ; HAS-FMA-LABEL: constrained_vector_exp2_v2f64:
    972 ; HAS-FMA:       # %bb.0: # %entry
    973 ; HAS-FMA-NEXT:    subq $24, %rsp
    974 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
    975 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    976 ; HAS-FMA-NEXT:    callq exp2
    977 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
    978 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    979 ; HAS-FMA-NEXT:    callq exp2
    980 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
    981 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
    982 ; HAS-FMA-NEXT:    addq $24, %rsp
    983 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
    984 ; HAS-FMA-NEXT:    retq
    985 entry:
    986   %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
    987                               <2 x double> <double 42.1, double 42.0>,
    988                               metadata !"round.dynamic",
    989                               metadata !"fpexcept.strict")
    990   ret <2 x double> %exp2
    991 }
    992 
    993 define <4 x double> @constrained_vector_exp2_v4f64() {
    994 ; NO-FMA-LABEL: constrained_vector_exp2_v4f64:
    995 ; NO-FMA:       # %bb.0: # %entry
    996 ; NO-FMA-NEXT:    subq $40, %rsp
    997 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
    998 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    999 ; NO-FMA-NEXT:    callq exp2
   1000 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1001 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1002 ; NO-FMA-NEXT:    callq exp2
   1003 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1004 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1005 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1006 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1007 ; NO-FMA-NEXT:    callq exp2
   1008 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1009 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1010 ; NO-FMA-NEXT:    callq exp2
   1011 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
   1012 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
   1013 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
   1014 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
   1015 ; NO-FMA-NEXT:    addq $40, %rsp
   1016 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1017 ; NO-FMA-NEXT:    retq
   1018 ;
   1019 ; HAS-FMA-LABEL: constrained_vector_exp2_v4f64:
   1020 ; HAS-FMA:       # %bb.0: # %entry
   1021 ; HAS-FMA-NEXT:    subq $40, %rsp
   1022 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
   1023 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1024 ; HAS-FMA-NEXT:    callq exp2
   1025 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1026 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1027 ; HAS-FMA-NEXT:    callq exp2
   1028 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1029 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1030 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1031 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1032 ; HAS-FMA-NEXT:    callq exp2
   1033 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1034 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1035 ; HAS-FMA-NEXT:    callq exp2
   1036 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
   1037 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1038 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
   1039 ; HAS-FMA-NEXT:    addq $40, %rsp
   1040 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1041 ; HAS-FMA-NEXT:    retq
   1042 entry:
   1043   %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
   1044                               <4 x double> <double 42.1, double 42.2,
   1045                                             double 42.3, double 42.4>,
   1046                               metadata !"round.dynamic",
   1047                               metadata !"fpexcept.strict")
   1048   ret <4 x double> %exp2
   1049 }
   1050 
   1051 define <2 x double> @constrained_vector_log_v2f64() {
   1052 ; NO-FMA-LABEL: constrained_vector_log_v2f64:
   1053 ; NO-FMA:       # %bb.0: # %entry
   1054 ; NO-FMA-NEXT:    subq $24, %rsp
   1055 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
   1056 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1057 ; NO-FMA-NEXT:    callq log
   1058 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1059 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1060 ; NO-FMA-NEXT:    callq log
   1061 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1062 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1063 ; NO-FMA-NEXT:    addq $24, %rsp
   1064 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1065 ; NO-FMA-NEXT:    retq
   1066 ;
   1067 ; HAS-FMA-LABEL: constrained_vector_log_v2f64:
   1068 ; HAS-FMA:       # %bb.0: # %entry
   1069 ; HAS-FMA-NEXT:    subq $24, %rsp
   1070 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
   1071 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1072 ; HAS-FMA-NEXT:    callq log
   1073 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1074 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1075 ; HAS-FMA-NEXT:    callq log
   1076 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1077 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1078 ; HAS-FMA-NEXT:    addq $24, %rsp
   1079 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1080 ; HAS-FMA-NEXT:    retq
   1081 entry:
   1082   %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
   1083                              <2 x double> <double 42.0, double 42.1>,
   1084                              metadata !"round.dynamic",
   1085                              metadata !"fpexcept.strict")
   1086   ret <2 x double> %log
   1087 }
   1088 
   1089 define <4 x double> @constrained_vector_log_v4f64() {
   1090 ; NO-FMA-LABEL: constrained_vector_log_v4f64:
   1091 ; NO-FMA:       # %bb.0: # %entry
   1092 ; NO-FMA-NEXT:    subq $40, %rsp
   1093 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
   1094 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1095 ; NO-FMA-NEXT:    callq log
   1096 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1097 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1098 ; NO-FMA-NEXT:    callq log
   1099 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1100 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1101 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1102 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1103 ; NO-FMA-NEXT:    callq log
   1104 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1105 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1106 ; NO-FMA-NEXT:    callq log
   1107 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
   1108 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
   1109 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
   1110 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
   1111 ; NO-FMA-NEXT:    addq $40, %rsp
   1112 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1113 ; NO-FMA-NEXT:    retq
   1114 ;
   1115 ; HAS-FMA-LABEL: constrained_vector_log_v4f64:
   1116 ; HAS-FMA:       # %bb.0: # %entry
   1117 ; HAS-FMA-NEXT:    subq $40, %rsp
   1118 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
   1119 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1120 ; HAS-FMA-NEXT:    callq log
   1121 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1122 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1123 ; HAS-FMA-NEXT:    callq log
   1124 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1125 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1126 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1127 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1128 ; HAS-FMA-NEXT:    callq log
   1129 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1130 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1131 ; HAS-FMA-NEXT:    callq log
   1132 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
   1133 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1134 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
   1135 ; HAS-FMA-NEXT:    addq $40, %rsp
   1136 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1137 ; HAS-FMA-NEXT:    retq
   1138 entry:
   1139   %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
   1140                              <4 x double> <double 42.0, double 42.1,
   1141                                            double 42.2, double 42.3>,
   1142                              metadata !"round.dynamic",
   1143                              metadata !"fpexcept.strict")
   1144   ret <4 x double> %log
   1145 }
   1146 
   1147 define <2 x double> @constrained_vector_log10_v2f64() {
   1148 ; NO-FMA-LABEL: constrained_vector_log10_v2f64:
   1149 ; NO-FMA:       # %bb.0: # %entry
   1150 ; NO-FMA-NEXT:    subq $24, %rsp
   1151 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
   1152 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1153 ; NO-FMA-NEXT:    callq log10
   1154 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1155 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1156 ; NO-FMA-NEXT:    callq log10
   1157 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1158 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1159 ; NO-FMA-NEXT:    addq $24, %rsp
   1160 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1161 ; NO-FMA-NEXT:    retq
   1162 ;
   1163 ; HAS-FMA-LABEL: constrained_vector_log10_v2f64:
   1164 ; HAS-FMA:       # %bb.0: # %entry
   1165 ; HAS-FMA-NEXT:    subq $24, %rsp
   1166 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
   1167 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1168 ; HAS-FMA-NEXT:    callq log10
   1169 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1170 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1171 ; HAS-FMA-NEXT:    callq log10
   1172 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1173 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1174 ; HAS-FMA-NEXT:    addq $24, %rsp
   1175 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1176 ; HAS-FMA-NEXT:    retq
   1177 entry:
   1178   %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
   1179                                <2 x double> <double 42.0, double 42.1>,
   1180                                metadata !"round.dynamic",
   1181                                metadata !"fpexcept.strict")
   1182   ret <2 x double> %log10
   1183 }
   1184 
   1185 define <4 x double> @constrained_vector_log10_v4f64() {
   1186 ; NO-FMA-LABEL: constrained_vector_log10_v4f64:
   1187 ; NO-FMA:       # %bb.0: # %entry
   1188 ; NO-FMA-NEXT:    subq $40, %rsp
   1189 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
   1190 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1191 ; NO-FMA-NEXT:    callq log10
   1192 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1193 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1194 ; NO-FMA-NEXT:    callq log10
   1195 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1196 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1197 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1198 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1199 ; NO-FMA-NEXT:    callq log10
   1200 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1201 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1202 ; NO-FMA-NEXT:    callq log10
   1203 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
   1204 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
   1205 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
   1206 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
   1207 ; NO-FMA-NEXT:    addq $40, %rsp
   1208 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1209 ; NO-FMA-NEXT:    retq
   1210 ;
   1211 ; HAS-FMA-LABEL: constrained_vector_log10_v4f64:
   1212 ; HAS-FMA:       # %bb.0: # %entry
   1213 ; HAS-FMA-NEXT:    subq $40, %rsp
   1214 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
   1215 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1216 ; HAS-FMA-NEXT:    callq log10
   1217 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1218 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1219 ; HAS-FMA-NEXT:    callq log10
   1220 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1221 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1222 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1223 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1224 ; HAS-FMA-NEXT:    callq log10
   1225 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1226 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1227 ; HAS-FMA-NEXT:    callq log10
   1228 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
   1229 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1230 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
   1231 ; HAS-FMA-NEXT:    addq $40, %rsp
   1232 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1233 ; HAS-FMA-NEXT:    retq
   1234 entry:
   1235   %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
   1236                                <4 x double> <double 42.0, double 42.1,
   1237                                              double 42.2, double 42.3>,
   1238                                metadata !"round.dynamic",
   1239                                metadata !"fpexcept.strict")
   1240   ret <4 x double> %log10
   1241 }
   1242 
   1243 define <2 x double> @constrained_vector_log2_v2f64() {
   1244 ; NO-FMA-LABEL: constrained_vector_log2_v2f64:
   1245 ; NO-FMA:       # %bb.0: # %entry
   1246 ; NO-FMA-NEXT:    subq $24, %rsp
   1247 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
   1248 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1249 ; NO-FMA-NEXT:    callq log2
   1250 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1251 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1252 ; NO-FMA-NEXT:    callq log2
   1253 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1254 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1255 ; NO-FMA-NEXT:    addq $24, %rsp
   1256 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1257 ; NO-FMA-NEXT:    retq
   1258 ;
   1259 ; HAS-FMA-LABEL: constrained_vector_log2_v2f64:
   1260 ; HAS-FMA:       # %bb.0: # %entry
   1261 ; HAS-FMA-NEXT:    subq $24, %rsp
   1262 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 32
   1263 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1264 ; HAS-FMA-NEXT:    callq log2
   1265 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1266 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1267 ; HAS-FMA-NEXT:    callq log2
   1268 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1269 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1270 ; HAS-FMA-NEXT:    addq $24, %rsp
   1271 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1272 ; HAS-FMA-NEXT:    retq
   1273 entry:
   1274   %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
   1275                               <2 x double> <double 42.0, double 42.1>,
   1276                               metadata !"round.dynamic",
   1277                               metadata !"fpexcept.strict")
   1278   ret <2 x double> %log2
   1279 }
   1280 
   1281 define <4 x double> @constrained_vector_log2_v4f64() {
   1282 ; NO-FMA-LABEL: constrained_vector_log2_v4f64:
   1283 ; NO-FMA:       # %bb.0: # %entry
   1284 ; NO-FMA-NEXT:    subq $40, %rsp
   1285 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
   1286 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1287 ; NO-FMA-NEXT:    callq log2
   1288 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1289 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1290 ; NO-FMA-NEXT:    callq log2
   1291 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1292 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1293 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1294 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1295 ; NO-FMA-NEXT:    callq log2
   1296 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1297 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1298 ; NO-FMA-NEXT:    callq log2
   1299 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
   1300 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
   1301 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
   1302 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
   1303 ; NO-FMA-NEXT:    addq $40, %rsp
   1304 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1305 ; NO-FMA-NEXT:    retq
   1306 ;
   1307 ; HAS-FMA-LABEL: constrained_vector_log2_v4f64:
   1308 ; HAS-FMA:       # %bb.0: # %entry
   1309 ; HAS-FMA-NEXT:    subq $40, %rsp
   1310 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 48
   1311 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1312 ; HAS-FMA-NEXT:    callq log2
   1313 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1314 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1315 ; HAS-FMA-NEXT:    callq log2
   1316 ; HAS-FMA-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
   1317 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1318 ; HAS-FMA-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
   1319 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1320 ; HAS-FMA-NEXT:    callq log2
   1321 ; HAS-FMA-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1322 ; HAS-FMA-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1323 ; HAS-FMA-NEXT:    callq log2
   1324 ; HAS-FMA-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
   1325 ; HAS-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1326 ; HAS-FMA-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
   1327 ; HAS-FMA-NEXT:    addq $40, %rsp
   1328 ; HAS-FMA-NEXT:    .cfi_def_cfa_offset 8
   1329 ; HAS-FMA-NEXT:    retq
   1330 entry:
   1331   %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
   1332                               <4 x double> <double 42.0, double 42.1,
   1333                                             double 42.2, double 42.3>,
   1334                               metadata !"round.dynamic",
   1335                               metadata !"fpexcept.strict")
   1336   ret <4 x double> %log2
   1337 }
   1338 
   1339 define <2 x double> @constrained_vector_rint_v2f64() {
   1340 ; NO-FMA-LABEL: constrained_vector_rint_v2f64:
   1341 ; NO-FMA:       # %bb.0: # %entry
   1342 ; NO-FMA-NEXT:    subq $24, %rsp
   1343 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
   1344 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1345 ; NO-FMA-NEXT:    callq rint
   1346 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1347 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1348 ; NO-FMA-NEXT:    callq rint
   1349 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1350 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1351 ; NO-FMA-NEXT:    addq $24, %rsp
   1352 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1353 ; NO-FMA-NEXT:    retq
   1354 ;
   1355 ; HAS-FMA-LABEL: constrained_vector_rint_v2f64:
   1356 ; HAS-FMA:       # %bb.0: # %entry
   1357 ; HAS-FMA-NEXT:    vroundpd $4, {{.*}}(%rip), %xmm0
   1358 ; HAS-FMA-NEXT:    retq
   1359 entry:
   1360   %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
   1361                         <2 x double> <double 42.1, double 42.0>,
   1362                         metadata !"round.dynamic",
   1363                         metadata !"fpexcept.strict")
   1364   ret <2 x double> %rint
   1365 }
   1366 
   1367 define <4 x double> @constrained_vector_rint_v4f64() {
   1368 ; NO-FMA-LABEL: constrained_vector_rint_v4f64:
   1369 ; NO-FMA:       # %bb.0: # %entry
   1370 ; NO-FMA-NEXT:    subq $40, %rsp
   1371 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
   1372 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1373 ; NO-FMA-NEXT:    callq rint
   1374 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1375 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1376 ; NO-FMA-NEXT:    callq rint
   1377 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1378 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1379 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1380 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1381 ; NO-FMA-NEXT:    callq rint
   1382 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1383 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1384 ; NO-FMA-NEXT:    callq rint
   1385 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
   1386 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
   1387 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
   1388 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
   1389 ; NO-FMA-NEXT:    addq $40, %rsp
   1390 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1391 ; NO-FMA-NEXT:    retq
   1392 ;
   1393 ; HAS-FMA-LABEL: constrained_vector_rint_v4f64:
   1394 ; HAS-FMA:       # %bb.0: # %entry
   1395 ; HAS-FMA-NEXT:    vroundpd $4, {{.*}}(%rip), %ymm0
   1396 ; HAS-FMA-NEXT:    retq
   1397 entry:
   1398   %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
   1399                         <4 x double> <double 42.1, double 42.2,
   1400                                       double 42.3, double 42.4>,
   1401                         metadata !"round.dynamic",
   1402                         metadata !"fpexcept.strict")
   1403   ret <4 x double> %rint
   1404 }
   1405 
   1406 define <2 x double> @constrained_vector_nearbyint_v2f64() {
   1407 ; NO-FMA-LABEL: constrained_vector_nearbyint_v2f64:
   1408 ; NO-FMA:       # %bb.0: # %entry
   1409 ; NO-FMA-NEXT:    subq $24, %rsp
   1410 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 32
   1411 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1412 ; NO-FMA-NEXT:    callq nearbyint
   1413 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1414 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1415 ; NO-FMA-NEXT:    callq nearbyint
   1416 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1417 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1418 ; NO-FMA-NEXT:    addq $24, %rsp
   1419 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1420 ; NO-FMA-NEXT:    retq
   1421 ;
   1422 ; HAS-FMA-LABEL: constrained_vector_nearbyint_v2f64:
   1423 ; HAS-FMA:       # %bb.0: # %entry
   1424 ; HAS-FMA-NEXT:    vroundpd $12, {{.*}}(%rip), %xmm0
   1425 ; HAS-FMA-NEXT:    retq
   1426 entry:
   1427   %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
   1428                                 <2 x double> <double 42.1, double 42.0>,
   1429                                 metadata !"round.dynamic",
   1430                                 metadata !"fpexcept.strict")
   1431   ret <2 x double> %nearby
   1432 }
   1433 
   1434 define <4 x double> @constrained_vector_nearbyint_v4f64() {
   1435 ; NO-FMA-LABEL: constrained_vector_nearbyint_v4f64:
   1436 ; NO-FMA:       # %bb.0: # %entry
   1437 ; NO-FMA-NEXT:    subq $40, %rsp
   1438 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 48
   1439 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1440 ; NO-FMA-NEXT:    callq nearbyint
   1441 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1442 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1443 ; NO-FMA-NEXT:    callq nearbyint
   1444 ; NO-FMA-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
   1445 ; NO-FMA-NEXT:    # xmm0 = xmm0[0],mem[0]
   1446 ; NO-FMA-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
   1447 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1448 ; NO-FMA-NEXT:    callq nearbyint
   1449 ; NO-FMA-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
   1450 ; NO-FMA-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1451 ; NO-FMA-NEXT:    callq nearbyint
   1452 ; NO-FMA-NEXT:    movaps %xmm0, %xmm1
   1453 ; NO-FMA-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
   1454 ; NO-FMA-NEXT:    # xmm1 = xmm1[0],mem[0]
   1455 ; NO-FMA-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
   1456 ; NO-FMA-NEXT:    addq $40, %rsp
   1457 ; NO-FMA-NEXT:    .cfi_def_cfa_offset 8
   1458 ; NO-FMA-NEXT:    retq
   1459 ;
   1460 ; HAS-FMA-LABEL: constrained_vector_nearbyint_v4f64:
   1461 ; HAS-FMA:       # %bb.0: # %entry
   1462 ; HAS-FMA-NEXT:    vroundpd $12, {{.*}}(%rip), %ymm0
   1463 ; HAS-FMA-NEXT:    retq
   1464 entry:
   1465   %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
   1466                                 <4 x double> <double 42.1, double 42.2,
   1467                                               double 42.3, double 42.4>,
   1468                                 metadata !"round.dynamic",
   1469                                 metadata !"fpexcept.strict")
   1470   ret <4 x double> %nearby
   1471 }
   1472 
   1473 ; Single width declarations
   1474 declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
   1475 declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
   1476 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
   1477 declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
   1478 declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
   1479 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
   1480 declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
   1481 declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
   1482 declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
   1483 declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
   1484 declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
   1485 declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
   1486 declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
   1487 declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
   1488 declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
   1489 declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
   1490 declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
   1491 declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
   1492 
   1493 ; Double width declarations
   1494 declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
   1495 declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
   1496 declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
   1497 declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
   1498 declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
   1499 declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata)
   1500 declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
   1501 declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
   1502 declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
   1503 declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
   1504 declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
   1505 declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
   1506 declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
   1507 declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
   1508 declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
   1509 declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
   1510 declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
   1511 declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
   1512