Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      4 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s
      5 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s
      6 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
      7 
      8 ; These tests check that fdiv is expanded correctly and also test that the
      9 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
     10 ; instruction groups.
     11 
     12 ; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
     13 
     14 ; FUNC-LABEL: {{^}}fdiv_f32:
     15 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
     16 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
     17 
     18 ; UNSAFE-FP: v_rcp_f32
     19 ; UNSAFE-FP: v_mul_f32_e32
     20 
     21 ; SI-DAG: v_rcp_f32
     22 ; SI-DAG: v_mul_f32
     23 
     24 ; I754-DAG: v_div_scale_f32
     25 ; I754-DAG: v_rcp_f32
     26 ; I754-DAG: v_fma_f32
     27 ; I754-DAG: v_mul_f32
     28 ; I754-DAG: v_fma_f32
     29 ; I754-DAG: v_div_fixup_f32
     30 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
     31 entry:
     32   %0 = fdiv float %a, %b
     33   store float %0, float addrspace(1)* %out
     34   ret void
     35 }
     36 
     37 ; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
     38 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
     39 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
     40 
     41 ; UNSAFE-FP: v_rcp_f32
     42 ; UNSAFE-FP: v_mul_f32_e32
     43 
     44 ; SI-DAG: v_rcp_f32
     45 ; SI-DAG: v_mul_f32
     46 define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) {
     47 entry:
     48   %0 = fdiv fast float %a, %b
     49   store float %0, float addrspace(1)* %out
     50   ret void
     51 }
     52 
     53 ; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
     54 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
     55 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
     56 
     57 ; UNSAFE-FP: v_rcp_f32
     58 ; UNSAFE-FP: v_mul_f32_e32
     59 
     60 ; SI-DAG: v_rcp_f32
     61 ; SI-DAG: v_mul_f32
     62 define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) {
     63 entry:
     64   %0 = fdiv arcp float %a, %b
     65   store float %0, float addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; FUNC-LABEL: {{^}}fdiv_v2f32:
     70 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
     71 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
     72 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
     73 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
     74 
     75 ; UNSAFE-FP: v_rcp_f32
     76 ; UNSAFE-FP: v_rcp_f32
     77 ; UNSAFE-FP: v_mul_f32_e32
     78 ; UNSAFE-FP: v_mul_f32_e32
     79 
     80 ; SI-DAG: v_rcp_f32
     81 ; SI-DAG: v_mul_f32
     82 ; SI-DAG: v_rcp_f32
     83 ; SI-DAG: v_mul_f32
     84 
     85 ; I754: v_div_scale_f32
     86 ; I754: v_div_scale_f32
     87 ; I754: v_div_scale_f32
     88 ; I754: v_div_scale_f32
     89 ; I754: v_div_fixup_f32
     90 ; I754: v_div_fixup_f32
     91 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
     92 entry:
     93   %0 = fdiv <2 x float> %a, %b
     94   store <2 x float> %0, <2 x float> addrspace(1)* %out
     95   ret void
     96 }
     97 
     98 ; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
     99 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
    100 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
    101 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
    102 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
    103 
    104 ; UNSAFE-FP: v_rcp_f32
    105 ; UNSAFE-FP: v_rcp_f32
    106 ; UNSAFE-FP: v_mul_f32_e32
    107 ; UNSAFE-FP: v_mul_f32_e32
    108 
    109 ; SI-DAG: v_rcp_f32
    110 ; SI-DAG: v_mul_f32
    111 ; SI-DAG: v_rcp_f32
    112 ; SI-DAG: v_mul_f32
    113 define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
    114 entry:
    115   %0 = fdiv fast <2 x float> %a, %b
    116   store <2 x float> %0, <2 x float> addrspace(1)* %out
    117   ret void
    118 }
    119 
    120 ; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
    121 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
    122 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
    123 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
    124 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
    125 
    126 ; UNSAFE-FP: v_rcp_f32
    127 ; UNSAFE-FP: v_rcp_f32
    128 ; UNSAFE-FP: v_mul_f32_e32
    129 ; UNSAFE-FP: v_mul_f32_e32
    130 
    131 ; SI-DAG: v_rcp_f32
    132 ; SI-DAG: v_mul_f32
    133 ; SI-DAG: v_rcp_f32
    134 ; SI-DAG: v_mul_f32
    135 define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
    136 entry:
    137   %0 = fdiv arcp <2 x float> %a, %b
    138   store <2 x float> %0, <2 x float> addrspace(1)* %out
    139   ret void
    140 }
    141 
    142 ; FUNC-LABEL: {{^}}fdiv_v4f32:
    143 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    144 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    145 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    146 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    147 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    148 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    149 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    150 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    151 
    152 ; UNSAFE-FP: v_rcp_f32_e32
    153 ; UNSAFE-FP: v_rcp_f32_e32
    154 ; UNSAFE-FP: v_rcp_f32_e32
    155 ; UNSAFE-FP: v_rcp_f32_e32
    156 ; UNSAFE-FP: v_mul_f32_e32
    157 ; UNSAFE-FP: v_mul_f32_e32
    158 ; UNSAFE-FP: v_mul_f32_e32
    159 ; UNSAFE-FP: v_mul_f32_e32
    160 
    161 ; SI-DAG: v_rcp_f32
    162 ; SI-DAG: v_mul_f32
    163 ; SI-DAG: v_rcp_f32
    164 ; SI-DAG: v_mul_f32
    165 ; SI-DAG: v_rcp_f32
    166 ; SI-DAG: v_mul_f32
    167 ; SI-DAG: v_rcp_f32
    168 ; SI-DAG: v_mul_f32
    169 
    170 ; I754: v_div_scale_f32
    171 ; I754: v_div_scale_f32
    172 ; I754: v_div_scale_f32
    173 ; I754: v_div_scale_f32
    174 ; I754: v_div_scale_f32
    175 ; I754: v_div_scale_f32
    176 ; I754: v_div_scale_f32
    177 ; I754: v_div_scale_f32
    178 ; I754: v_div_fixup_f32
    179 ; I754: v_div_fixup_f32
    180 ; I754: v_div_fixup_f32
    181 ; I754: v_div_fixup_f32
    182 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
    183   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
    184   %a = load <4 x float>, <4 x float> addrspace(1) * %in
    185   %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
    186   %result = fdiv <4 x float> %a, %b
    187   store <4 x float> %result, <4 x float> addrspace(1)* %out
    188   ret void
    189 }
    190 
    191 ; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
    192 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    193 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    194 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    195 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    196 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    197 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    198 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    199 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    200 
    201 ; UNSAFE-FP: v_rcp_f32_e32
    202 ; UNSAFE-FP: v_rcp_f32_e32
    203 ; UNSAFE-FP: v_rcp_f32_e32
    204 ; UNSAFE-FP: v_rcp_f32_e32
    205 ; UNSAFE-FP: v_mul_f32_e32
    206 ; UNSAFE-FP: v_mul_f32_e32
    207 ; UNSAFE-FP: v_mul_f32_e32
    208 ; UNSAFE-FP: v_mul_f32_e32
    209 
    210 ; SI-DAG: v_rcp_f32
    211 ; SI-DAG: v_mul_f32
    212 ; SI-DAG: v_rcp_f32
    213 ; SI-DAG: v_mul_f32
    214 ; SI-DAG: v_rcp_f32
    215 ; SI-DAG: v_mul_f32
    216 ; SI-DAG: v_rcp_f32
    217 ; SI-DAG: v_mul_f32
    218 define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
    219   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
    220   %a = load <4 x float>, <4 x float> addrspace(1) * %in
    221   %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
    222   %result = fdiv fast <4 x float> %a, %b
    223   store <4 x float> %result, <4 x float> addrspace(1)* %out
    224   ret void
    225 }
    226 
    227 ; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
    228 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    229 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    230 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    231 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
    232 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    233 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    234 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    235 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
    236 
    237 ; UNSAFE-FP: v_rcp_f32_e32
    238 ; UNSAFE-FP: v_rcp_f32_e32
    239 ; UNSAFE-FP: v_rcp_f32_e32
    240 ; UNSAFE-FP: v_rcp_f32_e32
    241 ; UNSAFE-FP: v_mul_f32_e32
    242 ; UNSAFE-FP: v_mul_f32_e32
    243 ; UNSAFE-FP: v_mul_f32_e32
    244 ; UNSAFE-FP: v_mul_f32_e32
    245 
    246 ; SI-DAG: v_rcp_f32
    247 ; SI-DAG: v_mul_f32
    248 ; SI-DAG: v_rcp_f32
    249 ; SI-DAG: v_mul_f32
    250 ; SI-DAG: v_rcp_f32
    251 ; SI-DAG: v_mul_f32
    252 ; SI-DAG: v_rcp_f32
    253 ; SI-DAG: v_mul_f32
    254 define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
    255   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
    256   %a = load <4 x float>, <4 x float> addrspace(1) * %in
    257   %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
    258   %result = fdiv arcp <4 x float> %a, %b
    259   store <4 x float> %result, <4 x float> addrspace(1)* %out
    260   ret void
    261 }
    262