Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
      3 
      4 ; CHECK-LABEL: {{^}}inline_asm:
      5 ; CHECK: s_endpgm
      6 ; CHECK: s_endpgm
      7 define amdgpu_kernel void @inline_asm(i32 addrspace(1)* %out) {
      8 entry:
      9   store i32 5, i32 addrspace(1)* %out
     10   call void asm sideeffect "s_endpgm", ""()
     11   ret void
     12 }
     13 
     14 ; CHECK-LABEL: {{^}}inline_asm_shader:
     15 ; CHECK: s_endpgm
     16 ; CHECK: s_endpgm
     17 define amdgpu_ps void @inline_asm_shader() {
     18 entry:
     19   call void asm sideeffect "s_endpgm", ""()
     20   ret void
     21 }
     22 
     23 
     24 ; CHECK: {{^}}branch_on_asm:
     25 ; Make sure inline assembly is treted as divergent.
     26 ; CHECK: s_mov_b32 s{{[0-9]+}}, 0
     27 ; CHECK: s_and_saveexec_b64
     28 define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) {
     29 	%zero = call i32 asm "s_mov_b32 $0, 0", "=s"()
     30 	%cmp = icmp eq i32 %zero, 0
     31 	br i1 %cmp, label %if, label %endif
     32 
     33 if:
     34 	store i32 0, i32 addrspace(1)* %out
     35 	br label %endif
     36 
     37 endif:
     38   ret void
     39 }
     40 
     41 ; CHECK-LABEL: {{^}}v_cmp_asm:
     42 ; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
     43 ; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]]
     44 ; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
     45 ; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
     46 ; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
     47 define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) {
     48   %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
     49   store i64 %sgpr, i64 addrspace(1)* %out
     50   ret void
     51 }
     52 
     53 ; CHECK-LABEL: {{^}}code_size_inline_asm:
     54 ; CHECK: codeLenInByte = 12
     55 define amdgpu_kernel void @code_size_inline_asm(i32 addrspace(1)* %out) {
     56 entry:
     57   call void asm sideeffect "v_nop_e64", ""()
     58   ret void
     59 }
     60 
     61 ; All inlineasm instructions are assumed to be the maximum size
     62 ; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst:
     63 ; CHECK: codeLenInByte = 12
     64 define amdgpu_kernel void @code_size_inline_asm_small_inst(i32 addrspace(1)* %out) {
     65 entry:
     66   call void asm sideeffect "v_nop_e32", ""()
     67   ret void
     68 }
     69 
     70 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst:
     71 ; CHECK: codeLenInByte = 20
     72 define amdgpu_kernel void @code_size_inline_asm_2_inst(i32 addrspace(1)* %out) {
     73 entry:
     74   call void asm sideeffect "
     75     v_nop_e64
     76     v_nop_e64
     77    ", ""()
     78   ret void
     79 }
     80 
     81 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline:
     82 ; CHECK: codeLenInByte = 20
     83 define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(i32 addrspace(1)* %out) {
     84 entry:
     85   call void asm sideeffect "
     86     v_nop_e64
     87 
     88     v_nop_e64
     89    ", ""()
     90   ret void
     91 }
     92 
     93 ; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst:
     94 ; CHECK: codeLenInByte = 4
     95 define amdgpu_kernel void @code_size_inline_asm_0_inst(i32 addrspace(1)* %out) {
     96 entry:
     97   call void asm sideeffect "", ""()
     98   ret void
     99 }
    100 
    101 ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment:
    102 ; CHECK: codeLenInByte = 4
    103 define amdgpu_kernel void @code_size_inline_asm_1_comment(i32 addrspace(1)* %out) {
    104 entry:
    105   call void asm sideeffect "; comment", ""()
    106   ret void
    107 }
    108 
    109 ; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment:
    110 ; CHECK: codeLenInByte = 4
    111 define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(i32 addrspace(1)* %out) {
    112 entry:
    113   call void asm sideeffect "
    114 ; comment", ""()
    115   ret void
    116 }
    117 
    118 ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline:
    119 ; CHECK: codeLenInByte = 4
    120 define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(i32 addrspace(1)* %out) {
    121 entry:
    122   call void asm sideeffect "; comment
    123 ", ""()
    124   ret void
    125 }
    126 
    127 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line:
    128 ; CHECK: codeLenInByte = 4
    129 define amdgpu_kernel void @code_size_inline_asm_2_comments_line(i32 addrspace(1)* %out) {
    130 entry:
    131   call void asm sideeffect "; first comment ; second comment", ""()
    132   ret void
    133 }
    134 
    135 ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace:
    136 ; CHECK: codeLenInByte = 4
    137 define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(i32 addrspace(1)* %out) {
    138 entry:
    139   call void asm sideeffect "; first comment;second comment", ""()
    140   ret void
    141 }
    142 
    143 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0:
    144 ; CHECK: codeLenInByte = 20
    145 define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(i32 addrspace(1)* %out) {
    146 entry:
    147   call void asm sideeffect "; comment
    148     v_nop_e64 ; inline comment
    149 ; separate comment
    150     v_nop_e64
    151 
    152     ; trailing comment
    153     ; extra comment
    154   ", ""()
    155   ret void
    156 }
    157 
    158 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1:
    159 ; CHECK: codeLenInByte = 20
    160 define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(i32 addrspace(1)* %out) {
    161 entry:
    162   call void asm sideeffect "v_nop_e64 ; inline comment
    163 ; separate comment
    164     v_nop_e64
    165 
    166     ; trailing comment
    167     ; extra comment
    168   ", ""()
    169   ret void
    170 }
    171 
    172 ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands:
    173 ; CHECK: codeLenInByte = 20
    174 define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(i32 addrspace(1)* %out) {
    175 entry:
    176   call void asm sideeffect "; comment
    177     v_add_i32_e32 v0, vcc, v1, v2 ; inline comment
    178 ; separate comment
    179     v_bfrev_b32_e32 v0, 1
    180 
    181     ; trailing comment
    182     ; extra comment
    183   ", ""()
    184   ret void
    185 }
    186 
    187 ; FIXME: Should not have intermediate sgprs
    188 ; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
    189 ; CHECK: s_mov_b32 s1, 0
    190 ; CHECK: s_mov_b32 s0, 0x1e240
    191 ; CHECK: v_mov_b32_e32 v0, s0
    192 ; CHECK: v_mov_b32_e32 v1, s1
    193 ; CHECK: use v[0:1]
    194 define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
    195 entry:
    196   call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
    197   ret void
    198 }
    199 
    200 ; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr:
    201 ; CHECK: v_mov_b32_e32 v0, -1{{$}}
    202 ; CHECK: ; use v0
    203 define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
    204 entry:
    205   call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
    206   ret void
    207 }
    208 
    209 ; CHECK-LABEL: {{^}}i1_input_phys_vgpr:
    210 ; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]]
    211 ; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]]
    212 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]]
    213 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
    214 ; CHECK: ; use v0
    215 define amdgpu_kernel void @i1_input_phys_vgpr() {
    216 entry:
    217   %val = load i1, i1 addrspace(1)* undef
    218   call void asm sideeffect "; use $0 ", "{v0}"(i1 %val)
    219   ret void
    220 }
    221 
    222 ; FIXME: Should be scheduled to shrink vcc
    223 ; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2:
    224 ; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0
    225 ; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc
    226 ; CHECK: v_cmp_eq_u32_e32 vcc, 1, v1
    227 ; CHECK: v_cndmask_b32_e64 v1, 0, -1, vcc
    228 define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
    229 entry:
    230   %val0 = load volatile i1, i1 addrspace(1)* undef
    231   %val1 = load volatile i1, i1 addrspace(1)* undef
    232   call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
    233   ret void
    234 }
    235 
    236 ; CHECK-LABEL: {{^}}muliple_def_phys_vgpr:
    237 ; CHECK: ; def v0
    238 ; CHECK: v_mov_b32_e32 v1, v0
    239 ; CHECK: ; def v0
    240 ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
    241 define amdgpu_kernel void @muliple_def_phys_vgpr() {
    242 entry:
    243   %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
    244   %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
    245   %add = shl i32 %def0, %def1
    246   store i32 %add, i32 addrspace(1)* undef
    247   ret void
    248 }
    249 
    250 ; CHECK-LABEL: {{^}}asm_constraint_c_n:
    251 ; CHECK: s_trap 10{{$}}
    252 define amdgpu_kernel void @asm_constraint_c_n()  {
    253 entry:
    254   tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1
    255   ret void
    256 }
    257 
    258 ; CHECK-LABEL: {{^}}asm_constraint_n_n:
    259 ; CHECK: s_trap -10{{$}}
    260 define amdgpu_kernel void @asm_constraint_n_n()  {
    261 entry:
    262   tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
    263   ret void
    264 }
    265