1 ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s 4 5 declare double @llvm.fabs.f64(double %Val) 6 declare double @llvm.floor.f64(double) nounwind readnone 7 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone 8 declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone 9 declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone 10 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone 11 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone 12 13 ; FUNC-LABEL: {{^}}ffloor_f64: 14 ; CI: v_floor_f64_e32 15 ; SI: v_fract_f64_e32 16 ; SI-DAG: v_min_f64 17 ; SI-DAG: v_cmp_class_f64_e64 vcc 18 ; SI: v_cndmask_b32_e32 19 ; SI: v_cndmask_b32_e32 20 ; SI: v_add_f64 21 ; SI: s_endpgm 22 define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) { 23 %y = call double @llvm.floor.f64(double %x) nounwind readnone 24 store double %y, double addrspace(1)* %out 25 ret void 26 } 27 28 ; FUNC-LABEL: {{^}}ffloor_f64_neg: 29 ; CI: v_floor_f64_e64 30 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]] 31 ; SI-DAG: v_min_f64 32 ; SI-DAG: v_cmp_class_f64_e64 vcc 33 ; SI: v_cndmask_b32_e32 34 ; SI: v_cndmask_b32_e32 35 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]] 36 ; SI: s_endpgm 37 define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) { 38 %neg = fsub double 0.0, %x 39 %y = call double @llvm.floor.f64(double %neg) nounwind readnone 40 store double %y, double addrspace(1)* %out 41 ret void 42 } 43 44 ; FUNC-LABEL: {{^}}ffloor_f64_neg_abs: 45 ; CI: v_floor_f64_e64 46 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]| 47 ; SI-DAG: v_min_f64 48 ; SI-DAG: v_cmp_class_f64_e64 vcc 49 ; SI: v_cndmask_b32_e32 50 ; SI: v_cndmask_b32_e32 51 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]| 52 ; SI: s_endpgm 53 define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) { 54 %abs = call double @llvm.fabs.f64(double %x) 55 %neg = fsub double 0.0, %abs 56 %y = call double @llvm.floor.f64(double %neg) nounwind readnone 57 store double %y, double addrspace(1)* %out 58 ret void 59 } 60 61 ; FUNC-LABEL: {{^}}ffloor_v2f64: 62 ; CI: v_floor_f64_e32 63 ; CI: v_floor_f64_e32 64 define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { 65 %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone 66 store <2 x double> %y, <2 x double> addrspace(1)* %out 67 ret void 68 } 69 70 ; FUNC-LABEL: {{^}}ffloor_v3f64: 71 ; CI: v_floor_f64_e32 72 ; CI: v_floor_f64_e32 73 ; CI: v_floor_f64_e32 74 ; CI-NOT: v_floor_f64_e32 75 define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { 76 %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone 77 store <3 x double> %y, <3 x double> addrspace(1)* %out 78 ret void 79 } 80 81 ; FUNC-LABEL: {{^}}ffloor_v4f64: 82 ; CI: v_floor_f64_e32 83 ; CI: v_floor_f64_e32 84 ; CI: v_floor_f64_e32 85 ; CI: v_floor_f64_e32 86 define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { 87 %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone 88 store <4 x double> %y, <4 x double> addrspace(1)* %out 89 ret void 90 } 91 92 ; FUNC-LABEL: {{^}}ffloor_v8f64: 93 ; CI: v_floor_f64_e32 94 ; CI: v_floor_f64_e32 95 ; CI: v_floor_f64_e32 96 ; CI: v_floor_f64_e32 97 ; CI: v_floor_f64_e32 98 ; CI: v_floor_f64_e32 99 ; CI: v_floor_f64_e32 100 ; CI: v_floor_f64_e32 101 define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { 102 %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone 103 store <8 x double> %y, <8 x double> addrspace(1)* %out 104 ret void 105 } 106 107 ; FUNC-LABEL: {{^}}ffloor_v16f64: 108 ; CI: v_floor_f64_e32 109 ; CI: v_floor_f64_e32 110 ; CI: v_floor_f64_e32 111 ; CI: v_floor_f64_e32 112 ; CI: v_floor_f64_e32 113 ; CI: v_floor_f64_e32 114 ; CI: v_floor_f64_e32 115 ; CI: v_floor_f64_e32 116 ; CI: v_floor_f64_e32 117 ; CI: v_floor_f64_e32 118 ; CI: v_floor_f64_e32 119 ; CI: v_floor_f64_e32 120 ; CI: v_floor_f64_e32 121 ; CI: v_floor_f64_e32 122 ; CI: v_floor_f64_e32 123 ; CI: v_floor_f64_e32 124 define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { 125 %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone 126 store <16 x double> %y, <16 x double> addrspace(1)* %out 127 ret void 128 } 129