1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s 4 5 declare double @llvm.fabs.f64(double %Val) 6 declare double @llvm.floor.f64(double) nounwind readnone 7 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone 8 declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone 9 declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone 10 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone 11 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone 12 13 ; FUNC-LABEL: {{^}}ffloor_f64: 14 ; CI: v_floor_f64_e32 15 ; SI: v_fract_f64_e32 16 ; SI: v_min_f64 17 ; SI: v_cmp_class_f64_e64 18 ; SI: v_cndmask_b32_e64 19 ; SI: v_cndmask_b32_e64 20 ; SI: v_add_f64 21 ; SI: s_endpgm 22 define void @ffloor_f64(double addrspace(1)* %out, double %x) { 23 %y = call double @llvm.floor.f64(double %x) nounwind readnone 24 store double %y, double addrspace(1)* %out 25 ret void 26 } 27 28 ; FUNC-LABEL: {{^}}ffloor_f64_neg: 29 ; CI: v_floor_f64_e64 30 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]] 31 ; SI: v_min_f64 32 ; SI: v_cmp_class_f64_e64 33 ; SI: v_cndmask_b32_e64 34 ; SI: v_cndmask_b32_e64 35 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]] 36 ; SI: s_endpgm 37 define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { 38 %neg = fsub double 0.0, %x 39 %y = call double @llvm.floor.f64(double %neg) nounwind readnone 40 store double %y, double addrspace(1)* %out 41 ret void 42 } 43 44 ; FUNC-LABEL: {{^}}ffloor_f64_neg_abs: 45 ; CI: v_floor_f64_e64 46 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]| 47 ; SI: v_min_f64 48 ; SI: v_cmp_class_f64_e64 49 ; SI: v_cndmask_b32_e64 50 ; SI: v_cndmask_b32_e64 51 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]| 52 ; SI: s_endpgm 53 define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) { 54 %abs = call double @llvm.fabs.f64(double %x) 55 %neg = fsub double 0.0, %abs 56 %y = call double @llvm.floor.f64(double %neg) nounwind readnone 57 store double %y, double addrspace(1)* %out 58 ret void 59 } 60 61 ; FUNC-LABEL: {{^}}ffloor_v2f64: 62 ; CI: v_floor_f64_e32 63 ; CI: v_floor_f64_e32 64 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { 65 %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone 66 store <2 x double> %y, <2 x double> addrspace(1)* %out 67 ret void 68 } 69 70 ; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64: 71 ; FIXME-CI: v_floor_f64_e32 72 ; FIXME-CI: v_floor_f64_e32 73 ; FIXME-CI: v_floor_f64_e32 74 ; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { 75 ; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone 76 ; store <3 x double> %y, <3 x double> addrspace(1)* %out 77 ; ret void 78 ; } 79 80 ; FUNC-LABEL: {{^}}ffloor_v4f64: 81 ; CI: v_floor_f64_e32 82 ; CI: v_floor_f64_e32 83 ; CI: v_floor_f64_e32 84 ; CI: v_floor_f64_e32 85 define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { 86 %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone 87 store <4 x double> %y, <4 x double> addrspace(1)* %out 88 ret void 89 } 90 91 ; FUNC-LABEL: {{^}}ffloor_v8f64: 92 ; CI: v_floor_f64_e32 93 ; CI: v_floor_f64_e32 94 ; CI: v_floor_f64_e32 95 ; CI: v_floor_f64_e32 96 ; CI: v_floor_f64_e32 97 ; CI: v_floor_f64_e32 98 ; CI: v_floor_f64_e32 99 ; CI: v_floor_f64_e32 100 define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { 101 %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone 102 store <8 x double> %y, <8 x double> addrspace(1)* %out 103 ret void 104 } 105 106 ; FUNC-LABEL: {{^}}ffloor_v16f64: 107 ; CI: v_floor_f64_e32 108 ; CI: v_floor_f64_e32 109 ; CI: v_floor_f64_e32 110 ; CI: v_floor_f64_e32 111 ; CI: v_floor_f64_e32 112 ; CI: v_floor_f64_e32 113 ; CI: v_floor_f64_e32 114 ; CI: v_floor_f64_e32 115 ; CI: v_floor_f64_e32 116 ; CI: v_floor_f64_e32 117 ; CI: v_floor_f64_e32 118 ; CI: v_floor_f64_e32 119 ; CI: v_floor_f64_e32 120 ; CI: v_floor_f64_e32 121 ; CI: v_floor_f64_e32 122 ; CI: v_floor_f64_e32 123 define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { 124 %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone 125 store <16 x double> %y, <16 x double> addrspace(1)* %out 126 ret void 127 } 128