Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-SCHEDULE
      4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
      5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=prescott | FileCheck %s --check-prefix=SSE3
      6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX
      7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -mcpu=sandybridge | FileCheck %s --check-prefix=AVX
      8 
      9 define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
     10 ; SSE2-LABEL: _Z1fe:
     11 ; SSE2:       ## %bb.0: ## %entry
     12 ; SSE2-NEXT:    pushq %rbp
     13 ; SSE2-NEXT:    .cfi_def_cfa_offset 16
     14 ; SSE2-NEXT:    .cfi_offset %rbp, -16
     15 ; SSE2-NEXT:    movq %rsp, %rbp
     16 ; SSE2-NEXT:    .cfi_def_cfa_register %rbp
     17 ; SSE2-NEXT:    fldt 16(%rbp)
     18 ; SSE2-NEXT:    fnstcw -4(%rbp)
     19 ; SSE2-NEXT:    movzwl -4(%rbp), %eax
     20 ; SSE2-NEXT:    movw $3199, -4(%rbp) ## imm = 0xC7F
     21 ; SSE2-NEXT:    fldcw -4(%rbp)
     22 ; SSE2-NEXT:    movw %ax, -4(%rbp)
     23 ; SSE2-NEXT:    fistl -8(%rbp)
     24 ; SSE2-NEXT:    fldcw -4(%rbp)
     25 ; SSE2-NEXT:    cvtsi2sdl -8(%rbp), %xmm0
     26 ; SSE2-NEXT:    movsd %xmm0, -64(%rbp)
     27 ; SSE2-NEXT:    movsd %xmm0, -32(%rbp)
     28 ; SSE2-NEXT:    fsubl -32(%rbp)
     29 ; SSE2-NEXT:    flds {{.*}}(%rip)
     30 ; SSE2-NEXT:    fmul %st(0), %st(1)
     31 ; SSE2-NEXT:    fnstcw -2(%rbp)
     32 ; SSE2-NEXT:    movzwl -2(%rbp), %eax
     33 ; SSE2-NEXT:    movw $3199, -2(%rbp) ## imm = 0xC7F
     34 ; SSE2-NEXT:    fldcw -2(%rbp)
     35 ; SSE2-NEXT:    movw %ax, -2(%rbp)
     36 ; SSE2-NEXT:    fxch %st(1)
     37 ; SSE2-NEXT:    fistl -12(%rbp)
     38 ; SSE2-NEXT:    fldcw -2(%rbp)
     39 ; SSE2-NEXT:    xorps %xmm0, %xmm0
     40 ; SSE2-NEXT:    cvtsi2sdl -12(%rbp), %xmm0
     41 ; SSE2-NEXT:    movsd %xmm0, -56(%rbp)
     42 ; SSE2-NEXT:    movsd %xmm0, -24(%rbp)
     43 ; SSE2-NEXT:    fsubl -24(%rbp)
     44 ; SSE2-NEXT:    fmulp %st(1)
     45 ; SSE2-NEXT:    fstpl -48(%rbp)
     46 ; SSE2-NEXT:    popq %rbp
     47 ; SSE2-NEXT:    retq
     48 ;
     49 ; SSE2-SCHEDULE-LABEL: _Z1fe:
     50 ; SSE2-SCHEDULE:       ## %bb.0: ## %entry
     51 ; SSE2-SCHEDULE-NEXT:    pushq %rbp
     52 ; SSE2-SCHEDULE-NEXT:    .cfi_def_cfa_offset 16
     53 ; SSE2-SCHEDULE-NEXT:    .cfi_offset %rbp, -16
     54 ; SSE2-SCHEDULE-NEXT:    movq %rsp, %rbp
     55 ; SSE2-SCHEDULE-NEXT:    .cfi_def_cfa_register %rbp
     56 ; SSE2-SCHEDULE-NEXT:    fnstcw -4(%rbp)
     57 ; SSE2-SCHEDULE-NEXT:    movzwl -4(%rbp), %eax
     58 ; SSE2-SCHEDULE-NEXT:    movw $3199, -4(%rbp) ## imm = 0xC7F
     59 ; SSE2-SCHEDULE-NEXT:    fldcw -4(%rbp)
     60 ; SSE2-SCHEDULE-NEXT:    fldt 16(%rbp)
     61 ; SSE2-SCHEDULE-NEXT:    movw %ax, -4(%rbp)
     62 ; SSE2-SCHEDULE-NEXT:    fistl -8(%rbp)
     63 ; SSE2-SCHEDULE-NEXT:    fldcw -4(%rbp)
     64 ; SSE2-SCHEDULE-NEXT:    cvtsi2sdl -8(%rbp), %xmm0
     65 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -64(%rbp)
     66 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -32(%rbp)
     67 ; SSE2-SCHEDULE-NEXT:    fsubl -32(%rbp)
     68 ; SSE2-SCHEDULE-NEXT:    fnstcw -2(%rbp)
     69 ; SSE2-SCHEDULE-NEXT:    flds {{.*}}(%rip)
     70 ; SSE2-SCHEDULE-NEXT:    movzwl -2(%rbp), %eax
     71 ; SSE2-SCHEDULE-NEXT:    movw $3199, -2(%rbp) ## imm = 0xC7F
     72 ; SSE2-SCHEDULE-NEXT:    fldcw -2(%rbp)
     73 ; SSE2-SCHEDULE-NEXT:    fmul %st(0), %st(1)
     74 ; SSE2-SCHEDULE-NEXT:    movw %ax, -2(%rbp)
     75 ; SSE2-SCHEDULE-NEXT:    fxch %st(1)
     76 ; SSE2-SCHEDULE-NEXT:    fistl -12(%rbp)
     77 ; SSE2-SCHEDULE-NEXT:    fldcw -2(%rbp)
     78 ; SSE2-SCHEDULE-NEXT:    xorps %xmm0, %xmm0
     79 ; SSE2-SCHEDULE-NEXT:    cvtsi2sdl -12(%rbp), %xmm0
     80 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -56(%rbp)
     81 ; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -24(%rbp)
     82 ; SSE2-SCHEDULE-NEXT:    fsubl -24(%rbp)
     83 ; SSE2-SCHEDULE-NEXT:    fmulp %st(1)
     84 ; SSE2-SCHEDULE-NEXT:    fstpl -48(%rbp)
     85 ; SSE2-SCHEDULE-NEXT:    popq %rbp
     86 ; SSE2-SCHEDULE-NEXT:    retq
     87 ;
     88 ; SSE3-LABEL: _Z1fe:
     89 ; SSE3:       ## %bb.0: ## %entry
     90 ; SSE3-NEXT:    pushq %rbp
     91 ; SSE3-NEXT:    .cfi_def_cfa_offset 16
     92 ; SSE3-NEXT:    .cfi_offset %rbp, -16
     93 ; SSE3-NEXT:    movq %rsp, %rbp
     94 ; SSE3-NEXT:    .cfi_def_cfa_register %rbp
     95 ; SSE3-NEXT:    fldt 16(%rbp)
     96 ; SSE3-NEXT:    fld %st(0)
     97 ; SSE3-NEXT:    fisttpl -4(%rbp)
     98 ; SSE3-NEXT:    cvtsi2sdl -4(%rbp), %xmm0
     99 ; SSE3-NEXT:    movsd %xmm0, -48(%rbp)
    100 ; SSE3-NEXT:    movsd %xmm0, -24(%rbp)
    101 ; SSE3-NEXT:    fsubl -24(%rbp)
    102 ; SSE3-NEXT:    flds {{.*}}(%rip)
    103 ; SSE3-NEXT:    fmul %st(0), %st(1)
    104 ; SSE3-NEXT:    fld %st(1)
    105 ; SSE3-NEXT:    fisttpl -8(%rbp)
    106 ; SSE3-NEXT:    xorps %xmm0, %xmm0
    107 ; SSE3-NEXT:    cvtsi2sdl -8(%rbp), %xmm0
    108 ; SSE3-NEXT:    movsd %xmm0, -40(%rbp)
    109 ; SSE3-NEXT:    movsd %xmm0, -16(%rbp)
    110 ; SSE3-NEXT:    fxch %st(1)
    111 ; SSE3-NEXT:    fsubl -16(%rbp)
    112 ; SSE3-NEXT:    fmulp %st(1)
    113 ; SSE3-NEXT:    fstpl -32(%rbp)
    114 ; SSE3-NEXT:    popq %rbp
    115 ; SSE3-NEXT:    retq
    116 ;
    117 ; AVX-LABEL: _Z1fe:
    118 ; AVX:       ## %bb.0: ## %entry
    119 ; AVX-NEXT:    pushq %rbp
    120 ; AVX-NEXT:    .cfi_def_cfa_offset 16
    121 ; AVX-NEXT:    .cfi_offset %rbp, -16
    122 ; AVX-NEXT:    movq %rsp, %rbp
    123 ; AVX-NEXT:    .cfi_def_cfa_register %rbp
    124 ; AVX-NEXT:    fldt 16(%rbp)
    125 ; AVX-NEXT:    fld %st(0)
    126 ; AVX-NEXT:    fisttpl -4(%rbp)
    127 ; AVX-NEXT:    vcvtsi2sdl -4(%rbp), %xmm0, %xmm0
    128 ; AVX-NEXT:    vmovsd %xmm0, -48(%rbp)
    129 ; AVX-NEXT:    vmovsd %xmm0, -24(%rbp)
    130 ; AVX-NEXT:    fsubl -24(%rbp)
    131 ; AVX-NEXT:    flds {{.*}}(%rip)
    132 ; AVX-NEXT:    fmul %st(0), %st(1)
    133 ; AVX-NEXT:    fld %st(1)
    134 ; AVX-NEXT:    fisttpl -8(%rbp)
    135 ; AVX-NEXT:    vcvtsi2sdl -8(%rbp), %xmm1, %xmm0
    136 ; AVX-NEXT:    vmovsd %xmm0, -40(%rbp)
    137 ; AVX-NEXT:    vmovsd %xmm0, -16(%rbp)
    138 ; AVX-NEXT:    fxch %st(1)
    139 ; AVX-NEXT:    fsubl -16(%rbp)
    140 ; AVX-NEXT:    fmulp %st(1)
    141 ; AVX-NEXT:    fstpl -32(%rbp)
    142 ; AVX-NEXT:    popq %rbp
    143 ; AVX-NEXT:    retq
    144 entry:
    145   %tx = alloca [3 x double], align 16
    146   %0 = bitcast [3 x double]* %tx to i8*
    147   %conv = fptosi x86_fp80 %z to i32
    148   %conv1 = sitofp i32 %conv to double
    149   %arrayidx = getelementptr inbounds [3 x double], [3 x double]* %tx, i64 0, i64 0
    150   store double %conv1, double* %arrayidx, align 16
    151   %conv4 = fpext double %conv1 to x86_fp80
    152   %sub = fsub x86_fp80 %z, %conv4
    153   %mul = fmul x86_fp80 %sub, 0xK40178000000000000000
    154   %conv.1 = fptosi x86_fp80 %mul to i32
    155   %conv1.1 = sitofp i32 %conv.1 to double
    156   %arrayidx.1 = getelementptr inbounds [3 x double], [3 x double]* %tx, i64 0, i64 1
    157   store double %conv1.1, double* %arrayidx.1, align 8
    158   %conv4.1 = fpext double %conv1.1 to x86_fp80
    159   %sub.1 = fsub x86_fp80 %mul, %conv4.1
    160   %mul.1 = fmul x86_fp80 %sub.1, 0xK40178000000000000000
    161   %conv5 = fptrunc x86_fp80 %mul.1 to double
    162   %arrayidx6 = getelementptr inbounds [3 x double], [3 x double]* %tx, i64 0, i64 2
    163   store double %conv5, double* %arrayidx6, align 16
    164   ret void
    165 }
    166 
    167 attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    168