1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-SCHEDULE 4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 | FileCheck %s --check-prefix=SSE3 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=prescott | FileCheck %s --check-prefix=SSE3 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -mcpu=sandybridge | FileCheck %s --check-prefix=AVX 8 9 define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { 10 ; SSE2-LABEL: _Z1fe: 11 ; SSE2: ## %bb.0: ## %entry 12 ; SSE2-NEXT: pushq %rbp 13 ; SSE2-NEXT: .cfi_def_cfa_offset 16 14 ; SSE2-NEXT: .cfi_offset %rbp, -16 15 ; SSE2-NEXT: movq %rsp, %rbp 16 ; SSE2-NEXT: .cfi_def_cfa_register %rbp 17 ; SSE2-NEXT: fldt 16(%rbp) 18 ; SSE2-NEXT: fnstcw -4(%rbp) 19 ; SSE2-NEXT: movzwl -4(%rbp), %eax 20 ; SSE2-NEXT: movw $3199, -4(%rbp) ## imm = 0xC7F 21 ; SSE2-NEXT: fldcw -4(%rbp) 22 ; SSE2-NEXT: movw %ax, -4(%rbp) 23 ; SSE2-NEXT: fistl -8(%rbp) 24 ; SSE2-NEXT: fldcw -4(%rbp) 25 ; SSE2-NEXT: cvtsi2sdl -8(%rbp), %xmm0 26 ; SSE2-NEXT: movsd %xmm0, -64(%rbp) 27 ; SSE2-NEXT: movsd %xmm0, -32(%rbp) 28 ; SSE2-NEXT: fsubl -32(%rbp) 29 ; SSE2-NEXT: flds {{.*}}(%rip) 30 ; SSE2-NEXT: fmul %st(0), %st(1) 31 ; SSE2-NEXT: fnstcw -2(%rbp) 32 ; SSE2-NEXT: movzwl -2(%rbp), %eax 33 ; SSE2-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F 34 ; SSE2-NEXT: fldcw -2(%rbp) 35 ; SSE2-NEXT: movw %ax, -2(%rbp) 36 ; SSE2-NEXT: fxch %st(1) 37 ; SSE2-NEXT: fistl -12(%rbp) 38 ; SSE2-NEXT: fldcw -2(%rbp) 39 ; SSE2-NEXT: xorps %xmm0, %xmm0 40 ; SSE2-NEXT: cvtsi2sdl -12(%rbp), %xmm0 41 ; SSE2-NEXT: movsd %xmm0, -56(%rbp) 42 ; SSE2-NEXT: movsd %xmm0, -24(%rbp) 43 ; SSE2-NEXT: fsubl -24(%rbp) 44 ; SSE2-NEXT: fmulp %st(1) 45 ; SSE2-NEXT: fstpl -48(%rbp) 46 ; SSE2-NEXT: popq %rbp 47 ; SSE2-NEXT: retq 48 ; 49 ; SSE2-SCHEDULE-LABEL: _Z1fe: 50 ; SSE2-SCHEDULE: ## %bb.0: ## %entry 51 ; SSE2-SCHEDULE-NEXT: pushq %rbp 52 ; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_offset 16 53 ; SSE2-SCHEDULE-NEXT: .cfi_offset %rbp, -16 54 ; SSE2-SCHEDULE-NEXT: movq %rsp, %rbp 55 ; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_register %rbp 56 ; SSE2-SCHEDULE-NEXT: fnstcw -4(%rbp) 57 ; SSE2-SCHEDULE-NEXT: movzwl -4(%rbp), %eax 58 ; SSE2-SCHEDULE-NEXT: movw $3199, -4(%rbp) ## imm = 0xC7F 59 ; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp) 60 ; SSE2-SCHEDULE-NEXT: fldt 16(%rbp) 61 ; SSE2-SCHEDULE-NEXT: movw %ax, -4(%rbp) 62 ; SSE2-SCHEDULE-NEXT: fistl -8(%rbp) 63 ; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp) 64 ; SSE2-SCHEDULE-NEXT: cvtsi2sdl -8(%rbp), %xmm0 65 ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -64(%rbp) 66 ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -32(%rbp) 67 ; SSE2-SCHEDULE-NEXT: fsubl -32(%rbp) 68 ; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp) 69 ; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip) 70 ; SSE2-SCHEDULE-NEXT: movzwl -2(%rbp), %eax 71 ; SSE2-SCHEDULE-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F 72 ; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp) 73 ; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1) 74 ; SSE2-SCHEDULE-NEXT: movw %ax, -2(%rbp) 75 ; SSE2-SCHEDULE-NEXT: fxch %st(1) 76 ; SSE2-SCHEDULE-NEXT: fistl -12(%rbp) 77 ; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp) 78 ; SSE2-SCHEDULE-NEXT: xorps %xmm0, %xmm0 79 ; SSE2-SCHEDULE-NEXT: cvtsi2sdl -12(%rbp), %xmm0 80 ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -56(%rbp) 81 ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -24(%rbp) 82 ; SSE2-SCHEDULE-NEXT: fsubl -24(%rbp) 83 ; SSE2-SCHEDULE-NEXT: fmulp %st(1) 84 ; SSE2-SCHEDULE-NEXT: fstpl -48(%rbp) 85 ; SSE2-SCHEDULE-NEXT: popq %rbp 86 ; SSE2-SCHEDULE-NEXT: retq 87 ; 88 ; SSE3-LABEL: _Z1fe: 89 ; SSE3: ## %bb.0: ## %entry 90 ; SSE3-NEXT: pushq %rbp 91 ; SSE3-NEXT: .cfi_def_cfa_offset 16 92 ; SSE3-NEXT: .cfi_offset %rbp, -16 93 ; SSE3-NEXT: movq %rsp, %rbp 94 ; SSE3-NEXT: .cfi_def_cfa_register %rbp 95 ; SSE3-NEXT: fldt 16(%rbp) 96 ; SSE3-NEXT: fld %st(0) 97 ; SSE3-NEXT: fisttpl -4(%rbp) 98 ; SSE3-NEXT: cvtsi2sdl -4(%rbp), %xmm0 99 ; SSE3-NEXT: movsd %xmm0, -48(%rbp) 100 ; SSE3-NEXT: movsd %xmm0, -24(%rbp) 101 ; SSE3-NEXT: fsubl -24(%rbp) 102 ; SSE3-NEXT: flds {{.*}}(%rip) 103 ; SSE3-NEXT: fmul %st(0), %st(1) 104 ; SSE3-NEXT: fld %st(1) 105 ; SSE3-NEXT: fisttpl -8(%rbp) 106 ; SSE3-NEXT: xorps %xmm0, %xmm0 107 ; SSE3-NEXT: cvtsi2sdl -8(%rbp), %xmm0 108 ; SSE3-NEXT: movsd %xmm0, -40(%rbp) 109 ; SSE3-NEXT: movsd %xmm0, -16(%rbp) 110 ; SSE3-NEXT: fxch %st(1) 111 ; SSE3-NEXT: fsubl -16(%rbp) 112 ; SSE3-NEXT: fmulp %st(1) 113 ; SSE3-NEXT: fstpl -32(%rbp) 114 ; SSE3-NEXT: popq %rbp 115 ; SSE3-NEXT: retq 116 ; 117 ; AVX-LABEL: _Z1fe: 118 ; AVX: ## %bb.0: ## %entry 119 ; AVX-NEXT: pushq %rbp 120 ; AVX-NEXT: .cfi_def_cfa_offset 16 121 ; AVX-NEXT: .cfi_offset %rbp, -16 122 ; AVX-NEXT: movq %rsp, %rbp 123 ; AVX-NEXT: .cfi_def_cfa_register %rbp 124 ; AVX-NEXT: fldt 16(%rbp) 125 ; AVX-NEXT: fld %st(0) 126 ; AVX-NEXT: fisttpl -4(%rbp) 127 ; AVX-NEXT: vcvtsi2sdl -4(%rbp), %xmm0, %xmm0 128 ; AVX-NEXT: vmovsd %xmm0, -48(%rbp) 129 ; AVX-NEXT: vmovsd %xmm0, -24(%rbp) 130 ; AVX-NEXT: fsubl -24(%rbp) 131 ; AVX-NEXT: flds {{.*}}(%rip) 132 ; AVX-NEXT: fmul %st(0), %st(1) 133 ; AVX-NEXT: fld %st(1) 134 ; AVX-NEXT: fisttpl -8(%rbp) 135 ; AVX-NEXT: vcvtsi2sdl -8(%rbp), %xmm1, %xmm0 136 ; AVX-NEXT: vmovsd %xmm0, -40(%rbp) 137 ; AVX-NEXT: vmovsd %xmm0, -16(%rbp) 138 ; AVX-NEXT: fxch %st(1) 139 ; AVX-NEXT: fsubl -16(%rbp) 140 ; AVX-NEXT: fmulp %st(1) 141 ; AVX-NEXT: fstpl -32(%rbp) 142 ; AVX-NEXT: popq %rbp 143 ; AVX-NEXT: retq 144 entry: 145 %tx = alloca [3 x double], align 16 146 %0 = bitcast [3 x double]* %tx to i8* 147 %conv = fptosi x86_fp80 %z to i32 148 %conv1 = sitofp i32 %conv to double 149 %arrayidx = getelementptr inbounds [3 x double], [3 x double]* %tx, i64 0, i64 0 150 store double %conv1, double* %arrayidx, align 16 151 %conv4 = fpext double %conv1 to x86_fp80 152 %sub = fsub x86_fp80 %z, %conv4 153 %mul = fmul x86_fp80 %sub, 0xK40178000000000000000 154 %conv.1 = fptosi x86_fp80 %mul to i32 155 %conv1.1 = sitofp i32 %conv.1 to double 156 %arrayidx.1 = getelementptr inbounds [3 x double], [3 x double]* %tx, i64 0, i64 1 157 store double %conv1.1, double* %arrayidx.1, align 8 158 %conv4.1 = fpext double %conv1.1 to x86_fp80 159 %sub.1 = fsub x86_fp80 %mul, %conv4.1 160 %mul.1 = fmul x86_fp80 %sub.1, 0xK40178000000000000000 161 %conv5 = fptrunc x86_fp80 %mul.1 to double 162 %arrayidx6 = getelementptr inbounds [3 x double], [3 x double]* %tx, i64 0, i64 2 163 store double %conv5, double* %arrayidx6, align 16 164 ret void 165 } 166 167 attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 168