Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
      4 
      5 declare double @llvm.fabs.f64(double %Val)
      6 declare double @llvm.floor.f64(double) nounwind readnone
      7 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
      8 declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
      9 declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
     10 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
     11 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
     12 
     13 ; FUNC-LABEL: {{^}}ffloor_f64:
     14 ; CI: v_floor_f64_e32
     15 ; SI: v_fract_f64_e32
     16 ; SI-DAG: v_min_f64
     17 ; SI-DAG: v_cmp_class_f64_e64 vcc
     18 ; SI: v_cndmask_b32_e32
     19 ; SI: v_cndmask_b32_e32
     20 ; SI: v_add_f64
     21 ; SI: s_endpgm
     22 define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
     23   %y = call double @llvm.floor.f64(double %x) nounwind readnone
     24   store double %y, double addrspace(1)* %out
     25   ret void
     26 }
     27 
     28 ; FUNC-LABEL: {{^}}ffloor_f64_neg:
     29 ; CI: v_floor_f64_e64
     30 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]]
     31 ; SI-DAG: v_min_f64
     32 ; SI-DAG: v_cmp_class_f64_e64 vcc
     33 ; SI: v_cndmask_b32_e32
     34 ; SI: v_cndmask_b32_e32
     35 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
     36 ; SI: s_endpgm
     37 define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
     38   %neg = fsub double 0.0, %x
     39   %y = call double @llvm.floor.f64(double %neg) nounwind readnone
     40   store double %y, double addrspace(1)* %out
     41   ret void
     42 }
     43 
     44 ; FUNC-LABEL: {{^}}ffloor_f64_neg_abs:
     45 ; CI: v_floor_f64_e64
     46 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]|
     47 ; SI-DAG: v_min_f64
     48 ; SI-DAG: v_cmp_class_f64_e64 vcc
     49 ; SI: v_cndmask_b32_e32
     50 ; SI: v_cndmask_b32_e32
     51 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
     52 ; SI: s_endpgm
     53 define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
     54   %abs = call double @llvm.fabs.f64(double %x)
     55   %neg = fsub double 0.0, %abs
     56   %y = call double @llvm.floor.f64(double %neg) nounwind readnone
     57   store double %y, double addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; FUNC-LABEL: {{^}}ffloor_v2f64:
     62 ; CI: v_floor_f64_e32
     63 ; CI: v_floor_f64_e32
     64 define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
     65   %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
     66   store <2 x double> %y, <2 x double> addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; FUNC-LABEL: {{^}}ffloor_v3f64:
     71 ; CI: v_floor_f64_e32
     72 ; CI: v_floor_f64_e32
     73 ; CI: v_floor_f64_e32
     74 ; CI-NOT: v_floor_f64_e32
     75 define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
     76   %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
     77   store <3 x double> %y, <3 x double> addrspace(1)* %out
     78   ret void
     79 }
     80 
     81 ; FUNC-LABEL: {{^}}ffloor_v4f64:
     82 ; CI: v_floor_f64_e32
     83 ; CI: v_floor_f64_e32
     84 ; CI: v_floor_f64_e32
     85 ; CI: v_floor_f64_e32
     86 define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
     87   %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
     88   store <4 x double> %y, <4 x double> addrspace(1)* %out
     89   ret void
     90 }
     91 
     92 ; FUNC-LABEL: {{^}}ffloor_v8f64:
     93 ; CI: v_floor_f64_e32
     94 ; CI: v_floor_f64_e32
     95 ; CI: v_floor_f64_e32
     96 ; CI: v_floor_f64_e32
     97 ; CI: v_floor_f64_e32
     98 ; CI: v_floor_f64_e32
     99 ; CI: v_floor_f64_e32
    100 ; CI: v_floor_f64_e32
    101 define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
    102   %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
    103   store <8 x double> %y, <8 x double> addrspace(1)* %out
    104   ret void
    105 }
    106 
    107 ; FUNC-LABEL: {{^}}ffloor_v16f64:
    108 ; CI: v_floor_f64_e32
    109 ; CI: v_floor_f64_e32
    110 ; CI: v_floor_f64_e32
    111 ; CI: v_floor_f64_e32
    112 ; CI: v_floor_f64_e32
    113 ; CI: v_floor_f64_e32
    114 ; CI: v_floor_f64_e32
    115 ; CI: v_floor_f64_e32
    116 ; CI: v_floor_f64_e32
    117 ; CI: v_floor_f64_e32
    118 ; CI: v_floor_f64_e32
    119 ; CI: v_floor_f64_e32
    120 ; CI: v_floor_f64_e32
    121 ; CI: v_floor_f64_e32
    122 ; CI: v_floor_f64_e32
    123 ; CI: v_floor_f64_e32
    124 define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
    125   %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
    126   store <16 x double> %y, <16 x double> addrspace(1)* %out
    127   ret void
    128 }
    129