Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
      2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
      3 
      4 ; Tests for indirect addressing on SI, which is implemented using dynamic
      5 ; indexing of vectors.
      6 
      7 ; CHECK-LABEL: {{^}}extract_w_offset:
      8 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
      9 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
     10 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
     11 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
     12 ; CHECK: s_mov_b32 m0
     13 ; CHECK-NEXT: v_movrels_b32_e32
     14 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
     15 entry:
     16   %idx = add i32 %in, 1
     17   %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx
     18   store float %elt, float addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; XXX: Could do v_or_b32 directly
     23 ; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector:
     24 ; CHECK-DAG: s_or_b32
     25 ; CHECK-DAG: s_or_b32
     26 ; CHECK-DAG: s_or_b32
     27 ; CHECK-DAG: s_or_b32
     28 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
     29 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
     30 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
     31 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
     32 ; CHECK: s_mov_b32 m0
     33 ; CHECK-NEXT: v_movrels_b32_e32
     34 define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) {
     35 entry:
     36   %idx = add i32 %in, 1
     37   %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4>
     38   %elt = extractelement <4 x i32> %vec, i32 %idx
     39   store i32 %elt, i32 addrspace(1)* %out
     40   ret void
     41 }
     42 
     43 ; CHECK-LABEL: {{^}}extract_wo_offset:
     44 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
     45 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
     46 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
     47 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
     48 ; CHECK: s_mov_b32 m0
     49 ; CHECK-NEXT: v_movrels_b32_e32
     50 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
     51 entry:
     52   %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
     53   store float %elt, float addrspace(1)* %out
     54   ret void
     55 }
     56 
     57 ; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
     58 ; The offset depends on the register that holds the first element of the vector.
     59 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
     60 ; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
     61 define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
     62 entry:
     63   %index = add i32 %offset, -512
     64   %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
     65   store i32 %value, i32 addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded:
     70 ; The offset depends on the register that holds the first element of the vector.
     71 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
     72 ; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
     73 define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) {
     74 entry:
     75   %index = add i32 %offset, -512
     76   %or = or <4 x i32> %vec0, %vec1
     77   %value = extractelement <4 x i32> %or, i32 %index
     78   store i32 %value, i32 addrspace(1)* %out
     79   ret void
     80 }
     81 
     82 ; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
     83 ; The offset depends on the register that holds the first element of the vector.
     84 ; CHECK: v_readfirstlane_b32
     85 ; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
     86 ; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
     87 ; CHECK: s_cbranch_execnz
     88 define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
     89 entry:
     90   %id = call i32 @llvm.r600.read.tidig.x() #1
     91   %index = add i32 %id, -512
     92   %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
     93   store i32 %value, i32 addrspace(1)* %out
     94   ret void
     95 }
     96 
     97 ; CHECK-LABEL: {{^}}insert_w_offset:
     98 ; CHECK: s_mov_b32 m0
     99 ; CHECK-NEXT: v_movreld_b32_e32
    100 define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
    101 entry:
    102   %0 = add i32 %in, 1
    103   %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
    104   %2 = extractelement <4 x float> %1, i32 2
    105   store float %2, float addrspace(1)* %out
    106   ret void
    107 }
    108 
    109 ; CHECK-LABEL: {{^}}insert_wo_offset:
    110 ; CHECK: s_mov_b32 m0
    111 ; CHECK-NEXT: v_movreld_b32_e32
    112 define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
    113 entry:
    114   %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
    115   %1 = extractelement <4 x float> %0, i32 2
    116   store float %1, float addrspace(1)* %out
    117   ret void
    118 }
    119 
    120 ; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
    121 ; The offset depends on the register that holds the first element of the vector.
    122 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
    123 ; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
    124 define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
    125 entry:
    126   %index = add i32 %offset, -512
    127   %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
    128   store <4 x i32> %value, <4 x i32> addrspace(1)* %out
    129   ret void
    130 }
    131 
    132 ; The vector indexed into is originally loaded into an SGPR rather
    133 ; than built with a reg_sequence
    134 
    135 ; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg:
    136 ; The offset depends on the register that holds the first element of the vector.
    137 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
    138 ; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
    139 define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) {
    140 entry:
    141   %index = add i32 %offset, -512
    142   %value = insertelement <4 x i32> %vec, i32 5, i32 %index
    143   store <4 x i32> %value, <4 x i32> addrspace(1)* %out
    144   ret void
    145 }
    146 
    147 ; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
    148 ; The offset depends on the register that holds the first element of the vector.
    149 ; CHECK: v_readfirstlane_b32
    150 ; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
    151 ; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
    152 ; CHECK: s_cbranch_execnz
    153 define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
    154 entry:
    155   %id = call i32 @llvm.r600.read.tidig.x() #1
    156   %index = add i32 %id, -512
    157   %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
    158   store <4 x i32> %value, <4 x i32> addrspace(1)* %out
    159   ret void
    160 }
    161 
    162 ; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
    163 ; The offset depends on the register that holds the first element of the vector.
    164 ; CHECK: v_readfirstlane_b32
    165 ; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
    166 ; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
    167 ; CHECK: s_cbranch_execnz
    168 define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
    169 entry:
    170   %id = call i32 @llvm.r600.read.tidig.x() #1
    171   %index = add i32 %id, -16
    172   %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
    173   store <4 x i32> %value, <4 x i32> addrspace(1)* %out
    174   ret void
    175 }
    176 
    177 declare i32 @llvm.r600.read.tidig.x() #1
    178 attributes #1 = { nounwind readnone }
    179