Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
      4 
      5 declare double @llvm.fabs.f64(double %Val)
      6 declare double @llvm.floor.f64(double) nounwind readnone
      7 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
      8 declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
      9 declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
     10 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
     11 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
     12 
     13 ; FUNC-LABEL: {{^}}ffloor_f64:
     14 ; CI: v_floor_f64_e32
     15 ; SI: v_fract_f64_e32
     16 ; SI: v_min_f64
     17 ; SI: v_cmp_class_f64_e64
     18 ; SI: v_cndmask_b32_e64
     19 ; SI: v_cndmask_b32_e64
     20 ; SI: v_add_f64
     21 ; SI: s_endpgm
     22 define void @ffloor_f64(double addrspace(1)* %out, double %x) {
     23   %y = call double @llvm.floor.f64(double %x) nounwind readnone
     24   store double %y, double addrspace(1)* %out
     25   ret void
     26 }
     27 
     28 ; FUNC-LABEL: {{^}}ffloor_f64_neg:
     29 ; CI: v_floor_f64_e64
     30 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]]
     31 ; SI: v_min_f64
     32 ; SI: v_cmp_class_f64_e64
     33 ; SI: v_cndmask_b32_e64
     34 ; SI: v_cndmask_b32_e64
     35 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
     36 ; SI: s_endpgm
     37 define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
     38   %neg = fsub double 0.0, %x
     39   %y = call double @llvm.floor.f64(double %neg) nounwind readnone
     40   store double %y, double addrspace(1)* %out
     41   ret void
     42 }
     43 
     44 ; FUNC-LABEL: {{^}}ffloor_f64_neg_abs:
     45 ; CI: v_floor_f64_e64
     46 ; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]|
     47 ; SI: v_min_f64
     48 ; SI: v_cmp_class_f64_e64
     49 ; SI: v_cndmask_b32_e64
     50 ; SI: v_cndmask_b32_e64
     51 ; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
     52 ; SI: s_endpgm
     53 define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
     54   %abs = call double @llvm.fabs.f64(double %x)
     55   %neg = fsub double 0.0, %abs
     56   %y = call double @llvm.floor.f64(double %neg) nounwind readnone
     57   store double %y, double addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; FUNC-LABEL: {{^}}ffloor_v2f64:
     62 ; CI: v_floor_f64_e32
     63 ; CI: v_floor_f64_e32
     64 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
     65   %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
     66   store <2 x double> %y, <2 x double> addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
     71 ; FIXME-CI: v_floor_f64_e32
     72 ; FIXME-CI: v_floor_f64_e32
     73 ; FIXME-CI: v_floor_f64_e32
     74 ; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
     75 ;   %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
     76 ;   store <3 x double> %y, <3 x double> addrspace(1)* %out
     77 ;   ret void
     78 ; }
     79 
     80 ; FUNC-LABEL: {{^}}ffloor_v4f64:
     81 ; CI: v_floor_f64_e32
     82 ; CI: v_floor_f64_e32
     83 ; CI: v_floor_f64_e32
     84 ; CI: v_floor_f64_e32
     85 define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
     86   %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
     87   store <4 x double> %y, <4 x double> addrspace(1)* %out
     88   ret void
     89 }
     90 
     91 ; FUNC-LABEL: {{^}}ffloor_v8f64:
     92 ; CI: v_floor_f64_e32
     93 ; CI: v_floor_f64_e32
     94 ; CI: v_floor_f64_e32
     95 ; CI: v_floor_f64_e32
     96 ; CI: v_floor_f64_e32
     97 ; CI: v_floor_f64_e32
     98 ; CI: v_floor_f64_e32
     99 ; CI: v_floor_f64_e32
    100 define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
    101   %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
    102   store <8 x double> %y, <8 x double> addrspace(1)* %out
    103   ret void
    104 }
    105 
    106 ; FUNC-LABEL: {{^}}ffloor_v16f64:
    107 ; CI: v_floor_f64_e32
    108 ; CI: v_floor_f64_e32
    109 ; CI: v_floor_f64_e32
    110 ; CI: v_floor_f64_e32
    111 ; CI: v_floor_f64_e32
    112 ; CI: v_floor_f64_e32
    113 ; CI: v_floor_f64_e32
    114 ; CI: v_floor_f64_e32
    115 ; CI: v_floor_f64_e32
    116 ; CI: v_floor_f64_e32
    117 ; CI: v_floor_f64_e32
    118 ; CI: v_floor_f64_e32
    119 ; CI: v_floor_f64_e32
    120 ; CI: v_floor_f64_e32
    121 ; CI: v_floor_f64_e32
    122 ; CI: v_floor_f64_e32
    123 define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
    124   %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
    125   store <16 x double> %y, <16 x double> addrspace(1)* %out
    126   ret void
    127 }
    128