1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) 5 6 ; GCN-LABEL: {{^}}vgpr: 7 ; GCN: v_mov_b32_e32 v1, v0 8 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 9 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1 10 ; GCN: s_waitcnt expcnt(0) 11 ; GCN-NOT: s_endpgm 12 define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 13 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 14 %x = fadd float %3, 1.0 15 %a = insertvalue {float, float} undef, float %x, 0 16 %b = insertvalue {float, float} %a, float %3, 1 17 ret {float, float} %b 18 } 19 20 ; GCN-LABEL: {{^}}vgpr_literal: 21 ; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0 22 ; GCN: s_waitcnt expcnt(0) 23 ; GCN-DAG: v_mov_b32_e32 v0, 1.0 24 ; GCN-DAG: v_mov_b32_e32 v1, 2.0 25 ; GCN-DAG: v_mov_b32_e32 v2, 4.0 26 ; GCN-DAG: v_mov_b32_e32 v3, -1.0 27 ; GCN-NOT: s_endpgm 28 define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 29 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 30 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0} 31 } 32 33 34 ; GCN: .long 165580 35 ; GCN-NEXT: .long 562 36 ; GCN-NEXT: .long 165584 37 ; GCN-NEXT: .long 562 38 ; GCN-LABEL: {{^}}vgpr_ps_addr0: 39 ; GCN-NOT: v_mov_b32_e32 v0 40 ; GCN-NOT: v_mov_b32_e32 v1 41 ; GCN-NOT: v_mov_b32_e32 v2 42 ; GCN: v_mov_b32_e32 v3, v4 43 ; GCN: v_mov_b32_e32 v4, v6 44 ; GCN-NOT: s_endpgm 45 attributes #0 = { "InitialPSInputAddr"="0" } 46 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { 47 %i0 = extractelement <2 x i32> %4, i32 0 48 %i1 = extractelement <2 x i32> %4, i32 1 49 %i2 = extractelement <2 x i32> %7, i32 0 50 %i3 = extractelement <2 x i32> %8, i32 0 51 %f0 = bitcast i32 %i0 to float 52 %f1 = bitcast i32 %i1 to float 53 %f2 = bitcast i32 %i2 to float 54 %f3 = bitcast i32 %i3 to float 55 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 56 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 57 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 58 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 59 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 60 ret {float, float, float, float, float} %r4 61 } 62 63 64 ; GCN: .long 165580 65 ; GCN-NEXT: .long 1 66 ; GCN-NEXT: .long 165584 67 ; GCN-NEXT: .long 1 68 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs: 69 ; GCN: v_mov_b32_e32 v0, 1.0 70 ; GCN-NOT: s_endpgm 71 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { 72 ret float 1.0 73 } 74 75 76 ; GCN: .long 165580 77 ; GCN-NEXT: .long 2081 78 ; GCN-NEXT: .long 165584 79 ; GCN-NEXT: .long 2081 80 ; GCN-LABEL: {{^}}ps_input_ena_pos_w: 81 ; GCN-DAG: v_mov_b32_e32 v0, v4 82 ; GCN-DAG: v_mov_b32_e32 v1, v2 83 ; GCN: v_mov_b32_e32 v2, v3 84 ; GCN-NOT: s_endpgm 85 define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { 86 %f = bitcast <2 x i32> %8 to <2 x float> 87 %s = insertvalue {float, <2 x float>} undef, float %14, 0 88 %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1 89 ret {float, <2 x float>} %s1 90 } 91 92 93 ; GCN: .long 165580 94 ; GCN-NEXT: .long 562 95 ; GCN-NEXT: .long 165584 96 ; GCN-NEXT: .long 563 97 ; GCN-LABEL: {{^}}vgpr_ps_addr1: 98 ; GCN-DAG: v_mov_b32_e32 v0, v2 99 ; GCN-DAG: v_mov_b32_e32 v1, v3 100 ; GCN: v_mov_b32_e32 v2, v4 101 ; GCN-DAG: v_mov_b32_e32 v3, v6 102 ; GCN-DAG: v_mov_b32_e32 v4, v8 103 ; GCN-NOT: s_endpgm 104 attributes #1 = { "InitialPSInputAddr"="1" } 105 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 { 106 %i0 = extractelement <2 x i32> %4, i32 0 107 %i1 = extractelement <2 x i32> %4, i32 1 108 %i2 = extractelement <2 x i32> %7, i32 0 109 %i3 = extractelement <2 x i32> %8, i32 0 110 %f0 = bitcast i32 %i0 to float 111 %f1 = bitcast i32 %i1 to float 112 %f2 = bitcast i32 %i2 to float 113 %f3 = bitcast i32 %i3 to float 114 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 115 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 116 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 117 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 118 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 119 ret {float, float, float, float, float} %r4 120 } 121 122 123 ; GCN: .long 165580 124 ; GCN-NEXT: .long 562 125 ; GCN-NEXT: .long 165584 126 ; GCN-NEXT: .long 631 127 ; GCN-LABEL: {{^}}vgpr_ps_addr119: 128 ; GCN-DAG: v_mov_b32_e32 v0, v2 129 ; GCN-DAG: v_mov_b32_e32 v1, v3 130 ; GCN: v_mov_b32_e32 v2, v6 131 ; GCN: v_mov_b32_e32 v3, v8 132 ; GCN: v_mov_b32_e32 v4, v12 133 ; GCN-NOT: s_endpgm 134 attributes #2 = { "InitialPSInputAddr"="119" } 135 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 { 136 %i0 = extractelement <2 x i32> %4, i32 0 137 %i1 = extractelement <2 x i32> %4, i32 1 138 %i2 = extractelement <2 x i32> %7, i32 0 139 %i3 = extractelement <2 x i32> %8, i32 0 140 %f0 = bitcast i32 %i0 to float 141 %f1 = bitcast i32 %i1 to float 142 %f2 = bitcast i32 %i2 to float 143 %f3 = bitcast i32 %i3 to float 144 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 145 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 146 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 147 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 148 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 149 ret {float, float, float, float, float} %r4 150 } 151 152 153 ; GCN: .long 165580 154 ; GCN-NEXT: .long 562 155 ; GCN-NEXT: .long 165584 156 ; GCN-NEXT: .long 946 157 ; GCN-LABEL: {{^}}vgpr_ps_addr418: 158 ; GCN-NOT: v_mov_b32_e32 v0 159 ; GCN-NOT: v_mov_b32_e32 v1 160 ; GCN-NOT: v_mov_b32_e32 v2 161 ; GCN: v_mov_b32_e32 v3, v4 162 ; GCN: v_mov_b32_e32 v4, v8 163 ; GCN-NOT: s_endpgm 164 attributes #3 = { "InitialPSInputAddr"="418" } 165 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 { 166 %i0 = extractelement <2 x i32> %4, i32 0 167 %i1 = extractelement <2 x i32> %4, i32 1 168 %i2 = extractelement <2 x i32> %7, i32 0 169 %i3 = extractelement <2 x i32> %8, i32 0 170 %f0 = bitcast i32 %i0 to float 171 %f1 = bitcast i32 %i1 to float 172 %f2 = bitcast i32 %i2 to float 173 %f3 = bitcast i32 %i3 to float 174 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 175 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 176 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 177 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 178 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 179 ret {float, float, float, float, float} %r4 180 } 181 182 183 ; GCN-LABEL: {{^}}sgpr: 184 ; GCN: s_add_i32 s0, s3, 2 185 ; GCN: s_mov_b32 s2, s3 186 ; GCN-NOT: s_endpgm 187 define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 188 %x = add i32 %2, 2 189 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0 190 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1 191 %c = insertvalue {i32, i32, i32} %a, i32 %2, 2 192 ret {i32, i32, i32} %c 193 } 194 195 196 ; GCN-LABEL: {{^}}sgpr_literal: 197 ; GCN: s_mov_b32 s0, 5 198 ; GCN-NOT: s_mov_b32 s0, s0 199 ; GCN-DAG: s_mov_b32 s1, 6 200 ; GCN-DAG: s_mov_b32 s2, 7 201 ; GCN-DAG: s_mov_b32 s3, 8 202 ; GCN-NOT: s_endpgm 203 define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 204 %x = add i32 %2, 2 205 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8} 206 } 207 208 209 ; GCN-LABEL: {{^}}both: 210 ; GCN: v_mov_b32_e32 v1, v0 211 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1 212 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 213 ; GCN-DAG: s_add_i32 s0, s3, 2 214 ; GCN-DAG: s_mov_b32 s1, s2 215 ; GCN: s_mov_b32 s2, s3 216 ; GCN: s_waitcnt expcnt(0) 217 ; GCN-NOT: s_endpgm 218 define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 219 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 220 %v = fadd float %3, 1.0 221 %s = add i32 %2, 2 222 %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0 223 %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1 224 %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2 225 %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3 226 %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4 227 ret {float, i32, float, i32, i32} %a4 228 } 229 230 231 ; GCN-LABEL: {{^}}structure_literal: 232 ; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0 233 ; GCN: s_waitcnt expcnt(0) 234 ; GCN-DAG: v_mov_b32_e32 v0, 1.0 235 ; GCN-DAG: s_mov_b32 s0, 2 236 ; GCN-DAG: s_mov_b32 s1, 3 237 ; GCN-DAG: v_mov_b32_e32 v1, 2.0 238 ; GCN-DAG: v_mov_b32_e32 v2, 4.0 239 define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { 240 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) 241 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}} 242 } 243