Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
      4 
      5 ; FUNC-LABEL: {{^}}v_fsub_f32:
      6 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
      7 define amdgpu_kernel void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
      8   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
      9   %a = load float, float addrspace(1)* %in, align 4
     10   %b = load float, float addrspace(1)* %b_ptr, align 4
     11   %result = fsub float %a, %b
     12   store float %result, float addrspace(1)* %out, align 4
     13   ret void
     14 }
     15 
     16 ; FUNC-LABEL: {{^}}s_fsub_f32:
     17 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
     18 
     19 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     20 define amdgpu_kernel void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
     21   %sub = fsub float %a, %b
     22   store float %sub, float addrspace(1)* %out, align 4
     23   ret void
     24 }
     25 
     26 ; FUNC-LABEL: {{^}}fsub_v2f32:
     27 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
     28 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
     29 
     30 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     31 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     32 define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
     33   %sub = fsub <2 x float> %a, %b
     34   store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
     35   ret void
     36 }
     37 
     38 ; FUNC-LABEL: {{^}}v_fsub_v4f32:
     39 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
     40 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
     41 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
     42 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
     43 
     44 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
     45 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
     46 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
     47 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
     48 define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
     49   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
     50   %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
     51   %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
     52   %result = fsub <4 x float> %a, %b
     53   store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
     54   ret void
     55 }
     56 
     57 ; FUNC-LABEL: {{^}}s_fsub_v4f32:
     58 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     59 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     60 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     61 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
     62 ; SI: s_endpgm
     63 define amdgpu_kernel void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
     64   %result = fsub <4 x float> %a, %b
     65   store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
     66   ret void
     67 }
     68 
     69 ; FUNC-LABEL: {{^}}v_fneg_fsub_f32:
     70 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
     71 ; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
     72 define amdgpu_kernel void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
     73   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
     74   %a = load float, float addrspace(1)* %in, align 4
     75   %b = load float, float addrspace(1)* %b_ptr, align 4
     76   %result = fsub float %a, %b
     77   %neg.result = fsub float -0.0, %result
     78   store float %neg.result, float addrspace(1)* %out, align 4
     79   ret void
     80 }
     81 
     82 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32:
     83 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
     84 ; SI-NOT: xor
     85 define amdgpu_kernel void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
     86   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
     87   %a = load float, float addrspace(1)* %in, align 4
     88   %b = load float, float addrspace(1)* %b_ptr, align 4
     89   %result = fsub nsz float %a, %b
     90   %neg.result = fsub float -0.0, %result
     91   store float %neg.result, float addrspace(1)* %out, align 4
     92   ret void
     93 }
     94 
     95 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
     96 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
     97 ; SI-NOT: xor
     98 define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
     99   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
    100   %a = load float, float addrspace(1)* %in, align 4
    101   %b = load float, float addrspace(1)* %b_ptr, align 4
    102   %result = fsub float %a, %b
    103   %neg.result = fsub float -0.0, %result
    104   store float %neg.result, float addrspace(1)* %out, align 4
    105   ret void
    106 }
    107 
    108 ; For some reason the attribute has a string "true" or "false", so
    109 ; make sure it is disabled and the fneg is not folded if it is not
    110 ; "true".
    111 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
    112 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
    113 ; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
    114 define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
    115   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
    116   %a = load float, float addrspace(1)* %in, align 4
    117   %b = load float, float addrspace(1)* %b_ptr, align 4
    118   %result = fsub float %a, %b
    119   %neg.result = fsub float -0.0, %result
    120   store float %neg.result, float addrspace(1)* %out, align 4
    121   ret void
    122 }
    123 
    124 ; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
    125 ; SI-NOT: v_sub
    126 define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
    127   %a = load float, float addrspace(1)* %in, align 4
    128   %result = fsub float %a, 0.0
    129   store float %result, float addrspace(1)* %out, align 4
    130   ret void
    131 }
    132 
    133 attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
    134 attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
    135