Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      3 
      4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
      5 
      6 ; GCN-LABEL: {{^}}vgpr:
      7 ; GCN: v_mov_b32_e32 v1, v0
      8 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
      9 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
     10 ; GCN: s_waitcnt expcnt(0)
     11 ; GCN-NOT: s_endpgm
     12 define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
     13   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
     14   %x = fadd float %3, 1.0
     15   %a = insertvalue {float, float} undef, float %x, 0
     16   %b = insertvalue {float, float} %a, float %3, 1
     17   ret {float, float} %b
     18 }
     19 
     20 ; GCN-LABEL: {{^}}vgpr_literal:
     21 ; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0
     22 ; GCN: s_waitcnt expcnt(0)
     23 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
     24 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
     25 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
     26 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
     27 ; GCN-NOT: s_endpgm
     28 define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
     29   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
     30   ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
     31 }
     32 
     33 
     34 ; GCN: .long 165580
     35 ; GCN-NEXT: .long 562
     36 ; GCN-NEXT: .long 165584
     37 ; GCN-NEXT: .long 562
     38 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
     39 ; GCN-NOT: v_mov_b32_e32 v0
     40 ; GCN-NOT: v_mov_b32_e32 v1
     41 ; GCN-NOT: v_mov_b32_e32 v2
     42 ; GCN: v_mov_b32_e32 v3, v4
     43 ; GCN: v_mov_b32_e32 v4, v6
     44 ; GCN-NOT: s_endpgm
     45 attributes #0 = { "InitialPSInputAddr"="0" }
     46 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
     47   %i0 = extractelement <2 x i32> %4, i32 0
     48   %i1 = extractelement <2 x i32> %4, i32 1
     49   %i2 = extractelement <2 x i32> %7, i32 0
     50   %i3 = extractelement <2 x i32> %8, i32 0
     51   %f0 = bitcast i32 %i0 to float
     52   %f1 = bitcast i32 %i1 to float
     53   %f2 = bitcast i32 %i2 to float
     54   %f3 = bitcast i32 %i3 to float
     55   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
     56   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
     57   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
     58   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
     59   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
     60   ret {float, float, float, float, float} %r4
     61 }
     62 
     63 
     64 ; GCN: .long 165580
     65 ; GCN-NEXT: .long 1
     66 ; GCN-NEXT: .long 165584
     67 ; GCN-NEXT: .long 1
     68 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
     69 ; GCN: v_mov_b32_e32 v0, 1.0
     70 ; GCN-NOT: s_endpgm
     71 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
     72   ret float 1.0
     73 }
     74 
     75 
     76 ; GCN: .long 165580
     77 ; GCN-NEXT: .long 2081
     78 ; GCN-NEXT: .long 165584
     79 ; GCN-NEXT: .long 2081
     80 ; GCN-LABEL: {{^}}ps_input_ena_pos_w:
     81 ; GCN-DAG: v_mov_b32_e32 v0, v4
     82 ; GCN-DAG: v_mov_b32_e32 v1, v2
     83 ; GCN: v_mov_b32_e32 v2, v3
     84 ; GCN-NOT: s_endpgm
     85 define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
     86   %f = bitcast <2 x i32> %8 to <2 x float>
     87   %s = insertvalue {float, <2 x float>} undef, float %14, 0
     88   %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
     89   ret {float, <2 x float>} %s1
     90 }
     91 
     92 
     93 ; GCN: .long 165580
     94 ; GCN-NEXT: .long 562
     95 ; GCN-NEXT: .long 165584
     96 ; GCN-NEXT: .long 563
     97 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
     98 ; GCN-DAG: v_mov_b32_e32 v0, v2
     99 ; GCN-DAG: v_mov_b32_e32 v1, v3
    100 ; GCN: v_mov_b32_e32 v2, v4
    101 ; GCN-DAG: v_mov_b32_e32 v3, v6
    102 ; GCN-DAG: v_mov_b32_e32 v4, v8
    103 ; GCN-NOT: s_endpgm
    104 attributes #1 = { "InitialPSInputAddr"="1" }
    105 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
    106   %i0 = extractelement <2 x i32> %4, i32 0
    107   %i1 = extractelement <2 x i32> %4, i32 1
    108   %i2 = extractelement <2 x i32> %7, i32 0
    109   %i3 = extractelement <2 x i32> %8, i32 0
    110   %f0 = bitcast i32 %i0 to float
    111   %f1 = bitcast i32 %i1 to float
    112   %f2 = bitcast i32 %i2 to float
    113   %f3 = bitcast i32 %i3 to float
    114   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
    115   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
    116   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
    117   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
    118   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
    119   ret {float, float, float, float, float} %r4
    120 }
    121 
    122 
    123 ; GCN: .long 165580
    124 ; GCN-NEXT: .long 562
    125 ; GCN-NEXT: .long 165584
    126 ; GCN-NEXT: .long 631
    127 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
    128 ; GCN-DAG: v_mov_b32_e32 v0, v2
    129 ; GCN-DAG: v_mov_b32_e32 v1, v3
    130 ; GCN: v_mov_b32_e32 v2, v6
    131 ; GCN: v_mov_b32_e32 v3, v8
    132 ; GCN: v_mov_b32_e32 v4, v12
    133 ; GCN-NOT: s_endpgm
    134 attributes #2 = { "InitialPSInputAddr"="119" }
    135 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
    136   %i0 = extractelement <2 x i32> %4, i32 0
    137   %i1 = extractelement <2 x i32> %4, i32 1
    138   %i2 = extractelement <2 x i32> %7, i32 0
    139   %i3 = extractelement <2 x i32> %8, i32 0
    140   %f0 = bitcast i32 %i0 to float
    141   %f1 = bitcast i32 %i1 to float
    142   %f2 = bitcast i32 %i2 to float
    143   %f3 = bitcast i32 %i3 to float
    144   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
    145   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
    146   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
    147   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
    148   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
    149   ret {float, float, float, float, float} %r4
    150 }
    151 
    152 
    153 ; GCN: .long 165580
    154 ; GCN-NEXT: .long 562
    155 ; GCN-NEXT: .long 165584
    156 ; GCN-NEXT: .long 946
    157 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
    158 ; GCN-NOT: v_mov_b32_e32 v0
    159 ; GCN-NOT: v_mov_b32_e32 v1
    160 ; GCN-NOT: v_mov_b32_e32 v2
    161 ; GCN: v_mov_b32_e32 v3, v4
    162 ; GCN: v_mov_b32_e32 v4, v8
    163 ; GCN-NOT: s_endpgm
    164 attributes #3 = { "InitialPSInputAddr"="418" }
    165 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
    166   %i0 = extractelement <2 x i32> %4, i32 0
    167   %i1 = extractelement <2 x i32> %4, i32 1
    168   %i2 = extractelement <2 x i32> %7, i32 0
    169   %i3 = extractelement <2 x i32> %8, i32 0
    170   %f0 = bitcast i32 %i0 to float
    171   %f1 = bitcast i32 %i1 to float
    172   %f2 = bitcast i32 %i2 to float
    173   %f3 = bitcast i32 %i3 to float
    174   %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
    175   %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
    176   %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
    177   %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
    178   %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
    179   ret {float, float, float, float, float} %r4
    180 }
    181 
    182 
    183 ; GCN-LABEL: {{^}}sgpr:
    184 ; GCN: s_add_i32 s0, s3, 2
    185 ; GCN: s_mov_b32 s2, s3
    186 ; GCN-NOT: s_endpgm
    187 define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
    188   %x = add i32 %2, 2
    189   %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
    190   %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
    191   %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
    192   ret {i32, i32, i32} %c
    193 }
    194 
    195 
    196 ; GCN-LABEL: {{^}}sgpr_literal:
    197 ; GCN: s_mov_b32 s0, 5
    198 ; GCN-NOT: s_mov_b32 s0, s0
    199 ; GCN-DAG: s_mov_b32 s1, 6
    200 ; GCN-DAG: s_mov_b32 s2, 7
    201 ; GCN-DAG: s_mov_b32 s3, 8
    202 ; GCN-NOT: s_endpgm
    203 define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
    204   %x = add i32 %2, 2
    205   ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
    206 }
    207 
    208 
    209 ; GCN-LABEL: {{^}}both:
    210 ; GCN: v_mov_b32_e32 v1, v0
    211 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
    212 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
    213 ; GCN-DAG: s_add_i32 s0, s3, 2
    214 ; GCN-DAG: s_mov_b32 s1, s2
    215 ; GCN: s_mov_b32 s2, s3
    216 ; GCN: s_waitcnt expcnt(0)
    217 ; GCN-NOT: s_endpgm
    218 define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
    219   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
    220   %v = fadd float %3, 1.0
    221   %s = add i32 %2, 2
    222   %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
    223   %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
    224   %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
    225   %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
    226   %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
    227   ret {float, i32, float, i32, i32} %a4
    228 }
    229 
    230 
    231 ; GCN-LABEL: {{^}}structure_literal:
    232 ; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0
    233 ; GCN: s_waitcnt expcnt(0)
    234 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
    235 ; GCN-DAG: s_mov_b32 s0, 2
    236 ; GCN-DAG: s_mov_b32 s1, 3
    237 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
    238 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
    239 define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
    240   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
    241   ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
    242 }
    243