Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
      2 
      3 ; FIXME: Merge into indirect-addressing-si.ll
      4 
      5 ; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
      6 ; of the tied implicit use and def of the super register.
      7 
      8 ; CHECK-LABEL: {{^}}insert_wo_offset:
      9 ; CHECK: s_load_dword [[IN:s[0-9]+]]
     10 ; CHECK: s_mov_b32 m0, [[IN]]
     11 ; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
     12 ; CHECK-NEXT: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
     13 define amdgpu_kernel void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
     14 entry:
     15   %ins = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
     16   store <4 x float> %ins, <4 x float> addrspace(1)* %out
     17   ret void
     18 }
     19 
     20 ; Make sure we don't hit use of undefined register errors when expanding an
     21 ; extract with undef index.
     22 
     23 ; CHECK-LABEL: {{^}}extract_adjacent_blocks:
     24 ; CHECK: s_load_dword [[ARG:s[0-9]+]]
     25 ; CHECK: s_cmp_lg_u32
     26 ; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]
     27 
     28 ; CHECK: buffer_load_dwordx4
     29 ; CHECK: s_mov_b32 m0,
     30 ; CHECK: v_movrels_b32_e32
     31 
     32 ; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
     33 
     34 ; CHECK: [[BB4]]:
     35 ; CHECK: buffer_load_dwordx4
     36 ; CHECK: s_mov_b32 m0,
     37 ; CHECK: v_movrels_b32_e32
     38 
     39 ; CHECK: [[ENDBB]]:
     40 ; CHECK: buffer_store_dword
     41 ; CHECK: s_endpgm
     42 
     43 define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 {
     44 bb:
     45   %tmp = icmp eq i32 %arg, 0
     46   br i1 %tmp, label %bb1, label %bb4
     47 
     48 bb1:
     49   %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
     50   %tmp3 = extractelement <4 x float> %tmp2, i32 undef
     51   call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
     52   br label %bb7
     53 
     54 bb4:
     55   %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
     56   %tmp6 = extractelement <4 x float> %tmp5, i32 undef
     57   call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
     58   br label %bb7
     59 
     60 bb7:
     61   %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
     62   store volatile float %tmp8, float addrspace(1)* undef
     63   ret void
     64 }
     65