1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s 3 4 ; CHECK-LABEL: {{^}}inline_asm: 5 ; CHECK: s_endpgm 6 ; CHECK: s_endpgm 7 define amdgpu_kernel void @inline_asm(i32 addrspace(1)* %out) { 8 entry: 9 store i32 5, i32 addrspace(1)* %out 10 call void asm sideeffect "s_endpgm", ""() 11 ret void 12 } 13 14 ; CHECK-LABEL: {{^}}inline_asm_shader: 15 ; CHECK: s_endpgm 16 ; CHECK: s_endpgm 17 define amdgpu_ps void @inline_asm_shader() { 18 entry: 19 call void asm sideeffect "s_endpgm", ""() 20 ret void 21 } 22 23 24 ; CHECK: {{^}}branch_on_asm: 25 ; Make sure inline assembly is treted as divergent. 26 ; CHECK: s_mov_b32 s{{[0-9]+}}, 0 27 ; CHECK: s_and_saveexec_b64 28 define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) { 29 %zero = call i32 asm "s_mov_b32 $0, 0", "=s"() 30 %cmp = icmp eq i32 %zero, 0 31 br i1 %cmp, label %if, label %endif 32 33 if: 34 store i32 0, i32 addrspace(1)* %out 35 br label %endif 36 37 endif: 38 ret void 39 } 40 41 ; CHECK-LABEL: {{^}}v_cmp_asm: 42 ; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 43 ; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]] 44 ; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]] 45 ; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]] 46 ; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 47 define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) { 48 %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in) 49 store i64 %sgpr, i64 addrspace(1)* %out 50 ret void 51 } 52 53 ; CHECK-LABEL: {{^}}code_size_inline_asm: 54 ; CHECK: codeLenInByte = 12 55 define amdgpu_kernel void @code_size_inline_asm(i32 addrspace(1)* %out) { 56 entry: 57 call void asm sideeffect "v_nop_e64", ""() 58 ret void 59 } 60 61 ; All inlineasm instructions are assumed to be the maximum size 62 ; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst: 63 ; CHECK: codeLenInByte = 12 64 define amdgpu_kernel void @code_size_inline_asm_small_inst(i32 addrspace(1)* %out) { 65 entry: 66 call void asm sideeffect "v_nop_e32", ""() 67 ret void 68 } 69 70 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst: 71 ; CHECK: codeLenInByte = 20 72 define amdgpu_kernel void @code_size_inline_asm_2_inst(i32 addrspace(1)* %out) { 73 entry: 74 call void asm sideeffect " 75 v_nop_e64 76 v_nop_e64 77 ", ""() 78 ret void 79 } 80 81 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline: 82 ; CHECK: codeLenInByte = 20 83 define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(i32 addrspace(1)* %out) { 84 entry: 85 call void asm sideeffect " 86 v_nop_e64 87 88 v_nop_e64 89 ", ""() 90 ret void 91 } 92 93 ; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst: 94 ; CHECK: codeLenInByte = 4 95 define amdgpu_kernel void @code_size_inline_asm_0_inst(i32 addrspace(1)* %out) { 96 entry: 97 call void asm sideeffect "", ""() 98 ret void 99 } 100 101 ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment: 102 ; CHECK: codeLenInByte = 4 103 define amdgpu_kernel void @code_size_inline_asm_1_comment(i32 addrspace(1)* %out) { 104 entry: 105 call void asm sideeffect "; comment", ""() 106 ret void 107 } 108 109 ; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment: 110 ; CHECK: codeLenInByte = 4 111 define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(i32 addrspace(1)* %out) { 112 entry: 113 call void asm sideeffect " 114 ; comment", ""() 115 ret void 116 } 117 118 ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline: 119 ; CHECK: codeLenInByte = 4 120 define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(i32 addrspace(1)* %out) { 121 entry: 122 call void asm sideeffect "; comment 123 ", ""() 124 ret void 125 } 126 127 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line: 128 ; CHECK: codeLenInByte = 4 129 define amdgpu_kernel void @code_size_inline_asm_2_comments_line(i32 addrspace(1)* %out) { 130 entry: 131 call void asm sideeffect "; first comment ; second comment", ""() 132 ret void 133 } 134 135 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace: 136 ; CHECK: codeLenInByte = 4 137 define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(i32 addrspace(1)* %out) { 138 entry: 139 call void asm sideeffect "; first comment;second comment", ""() 140 ret void 141 } 142 143 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0: 144 ; CHECK: codeLenInByte = 20 145 define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(i32 addrspace(1)* %out) { 146 entry: 147 call void asm sideeffect "; comment 148 v_nop_e64 ; inline comment 149 ; separate comment 150 v_nop_e64 151 152 ; trailing comment 153 ; extra comment 154 ", ""() 155 ret void 156 } 157 158 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1: 159 ; CHECK: codeLenInByte = 20 160 define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(i32 addrspace(1)* %out) { 161 entry: 162 call void asm sideeffect "v_nop_e64 ; inline comment 163 ; separate comment 164 v_nop_e64 165 166 ; trailing comment 167 ; extra comment 168 ", ""() 169 ret void 170 } 171 172 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands: 173 ; CHECK: codeLenInByte = 20 174 define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(i32 addrspace(1)* %out) { 175 entry: 176 call void asm sideeffect "; comment 177 v_add_i32_e32 v0, vcc, v1, v2 ; inline comment 178 ; separate comment 179 v_bfrev_b32_e32 v0, 1 180 181 ; trailing comment 182 ; extra comment 183 ", ""() 184 ret void 185 } 186 187 ; FIXME: Should not have intermediate sgprs 188 ; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr: 189 ; CHECK: s_mov_b32 s1, 0 190 ; CHECK: s_mov_b32 s0, 0x1e240 191 ; CHECK: v_mov_b32_e32 v0, s0 192 ; CHECK: v_mov_b32_e32 v1, s1 193 ; CHECK: use v[0:1] 194 define amdgpu_kernel void @i64_imm_input_phys_vgpr() { 195 entry: 196 call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456) 197 ret void 198 } 199 200 ; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr: 201 ; CHECK: v_mov_b32_e32 v0, -1{{$}} 202 ; CHECK: ; use v0 203 define amdgpu_kernel void @i1_imm_input_phys_vgpr() { 204 entry: 205 call void asm sideeffect "; use $0 ", "{v0}"(i1 true) 206 ret void 207 } 208 209 ; CHECK-LABEL: {{^}}i1_input_phys_vgpr: 210 ; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]] 211 ; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]] 212 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]] 213 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 214 ; CHECK: ; use v0 215 define amdgpu_kernel void @i1_input_phys_vgpr() { 216 entry: 217 %val = load i1, i1 addrspace(1)* undef 218 call void asm sideeffect "; use $0 ", "{v0}"(i1 %val) 219 ret void 220 } 221 222 ; FIXME: Should be scheduled to shrink vcc 223 ; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2: 224 ; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0 225 ; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc 226 ; CHECK: v_cmp_eq_u32_e32 vcc, 1, v1 227 ; CHECK: v_cndmask_b32_e64 v1, 0, -1, vcc 228 define amdgpu_kernel void @i1_input_phys_vgpr_x2() { 229 entry: 230 %val0 = load volatile i1, i1 addrspace(1)* undef 231 %val1 = load volatile i1, i1 addrspace(1)* undef 232 call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1) 233 ret void 234 } 235 236 ; CHECK-LABEL: {{^}}muliple_def_phys_vgpr: 237 ; CHECK: ; def v0 238 ; CHECK: v_mov_b32_e32 v1, v0 239 ; CHECK: ; def v0 240 ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 241 define amdgpu_kernel void @muliple_def_phys_vgpr() { 242 entry: 243 %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() 244 %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"() 245 %add = shl i32 %def0, %def1 246 store i32 %add, i32 addrspace(1)* undef 247 ret void 248 } 249 250 ; CHECK-LABEL: {{^}}asm_constraint_c_n: 251 ; CHECK: s_trap 10{{$}} 252 define amdgpu_kernel void @asm_constraint_c_n() { 253 entry: 254 tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1 255 ret void 256 } 257 258 ; CHECK-LABEL: {{^}}asm_constraint_n_n: 259 ; CHECK: s_trap -10{{$}} 260 define amdgpu_kernel void @asm_constraint_n_n() { 261 entry: 262 tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1 263 ret void 264 } 265