Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 
      4 ; Test expansion of scalar selects on vectors.
      5 ; Evergreen not enabled since it seems to be having problems with doubles.
      6 
      7 
      8 ; FUNC-LABEL: {{^}}select_v4i8:
      9 ; SI: v_cndmask_b32_e32
     10 ; SI: v_cndmask_b32_e32
     11 ; SI: v_cndmask_b32_e32
     12 ; SI: v_cndmask_b32_e32
     13 define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
     14   %cmp = icmp eq i8 %c, 0
     15   %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
     16   store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4
     17   ret void
     18 }
     19 
     20 ; FUNC-LABEL: {{^}}select_v4i16:
     21 ; SI: v_cndmask_b32_e32
     22 ; SI: v_cndmask_b32_e32
     23 ; SI: v_cndmask_b32_e32
     24 ; SI: v_cndmask_b32_e32
     25 define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
     26   %cmp = icmp eq i32 %c, 0
     27   %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
     28   store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4
     29   ret void
     30 }
     31 
     32 ; FIXME: Expansion with bitwise operations may be better if doing a
     33 ; vector select with SGPR inputs.
     34 
     35 ; FUNC-LABEL: {{^}}s_select_v2i32:
     36 ; SI: v_cndmask_b32_e32
     37 ; SI: v_cndmask_b32_e32
     38 ; SI: buffer_store_dwordx2
     39 define void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
     40   %cmp = icmp eq i32 %c, 0
     41   %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
     42   store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8
     43   ret void
     44 }
     45 
     46 ; FUNC-LABEL: {{^}}s_select_v4i32:
     47 ; SI: v_cndmask_b32_e32
     48 ; SI: v_cndmask_b32_e32
     49 ; SI: v_cndmask_b32_e32
     50 ; SI: v_cndmask_b32_e32
     51 ; SI: buffer_store_dwordx4
     52 define void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
     53   %cmp = icmp eq i32 %c, 0
     54   %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
     55   store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16
     56   ret void
     57 }
     58 
     59 ; FUNC-LABEL: {{^}}v_select_v4i32:
     60 ; SI: buffer_load_dwordx4
     61 ; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}}
     62 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
     63 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
     64 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
     65 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
     66 ; SI: buffer_store_dwordx4
     67 define void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 {
     68 bb:
     69   %tmp2 = icmp ult i32 %cond, 32
     70   %val = load <4 x i32>, <4 x i32> addrspace(1)* %in
     71   %tmp3 = select i1 %tmp2, <4 x i32> %val, <4 x i32> zeroinitializer
     72   store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 16
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}select_v8i32:
     77 ; SI: v_cndmask_b32_e32
     78 ; SI: v_cndmask_b32_e32
     79 ; SI: v_cndmask_b32_e32
     80 ; SI: v_cndmask_b32_e32
     81 ; SI: v_cndmask_b32_e32
     82 ; SI: v_cndmask_b32_e32
     83 ; SI: v_cndmask_b32_e32
     84 ; SI: v_cndmask_b32_e32
     85 define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
     86   %cmp = icmp eq i32 %c, 0
     87   %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
     88   store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16
     89   ret void
     90 }
     91 
     92 ; FUNC-LABEL: {{^}}s_select_v2f32:
     93 ; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
     94 ; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
     95 
     96 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
     97 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
     98 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
     99 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
    100 ; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
    101 
    102 ; SI: v_cndmask_b32_e32
    103 ; SI: v_cndmask_b32_e32
    104 ; SI: buffer_store_dwordx2
    105 define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
    106   %cmp = icmp eq i32 %c, 0
    107   %select = select i1 %cmp, <2 x float> %a, <2 x float> %b
    108   store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16
    109   ret void
    110 }
    111 
    112 ; FUNC-LABEL: {{^}}s_select_v4f32:
    113 ; SI: s_load_dwordx4
    114 ; SI: s_load_dwordx4
    115 ; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
    116 
    117 ; SI: v_cndmask_b32_e32
    118 ; SI: v_cndmask_b32_e32
    119 ; SI: v_cndmask_b32_e32
    120 ; SI: v_cndmask_b32_e32
    121 
    122 ; SI: buffer_store_dwordx4
    123 define void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
    124   %cmp = icmp eq i32 %c, 0
    125   %select = select i1 %cmp, <4 x float> %a, <4 x float> %b
    126   store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16
    127   ret void
    128 }
    129 
    130 ; FUNC-LABEL: {{^}}v_select_v4f32:
    131 ; SI: buffer_load_dwordx4
    132 ; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}}
    133 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
    134 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
    135 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
    136 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
    137 ; SI: buffer_store_dwordx4
    138 define void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 {
    139 bb:
    140   %tmp2 = icmp ult i32 %cond, 32
    141   %val = load <4 x float>, <4 x float> addrspace(1)* %in
    142   %tmp3 = select i1 %tmp2, <4 x float> %val, <4 x float> zeroinitializer
    143   store <4 x float> %tmp3, <4 x float> addrspace(1)* %out, align 16
    144   ret void
    145 }
    146 
    147 ; FUNC-LABEL: {{^}}select_v8f32:
    148 ; SI: v_cndmask_b32_e32
    149 ; SI: v_cndmask_b32_e32
    150 ; SI: v_cndmask_b32_e32
    151 ; SI: v_cndmask_b32_e32
    152 ; SI: v_cndmask_b32_e32
    153 ; SI: v_cndmask_b32_e32
    154 ; SI: v_cndmask_b32_e32
    155 ; SI: v_cndmask_b32_e32
    156 define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
    157   %cmp = icmp eq i32 %c, 0
    158   %select = select i1 %cmp, <8 x float> %a, <8 x float> %b
    159   store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16
    160   ret void
    161 }
    162 
    163 ; FUNC-LABEL: {{^}}select_v2f64:
    164 ; SI: v_cndmask_b32_e32
    165 ; SI: v_cndmask_b32_e32
    166 ; SI: v_cndmask_b32_e32
    167 ; SI: v_cndmask_b32_e32
    168 define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
    169   %cmp = icmp eq i32 %c, 0
    170   %select = select i1 %cmp, <2 x double> %a, <2 x double> %b
    171   store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16
    172   ret void
    173 }
    174 
    175 ; FUNC-LABEL: {{^}}select_v4f64:
    176 ; SI: v_cndmask_b32_e32
    177 ; SI: v_cndmask_b32_e32
    178 ; SI: v_cndmask_b32_e32
    179 ; SI: v_cndmask_b32_e32
    180 ; SI: v_cndmask_b32_e32
    181 ; SI: v_cndmask_b32_e32
    182 ; SI: v_cndmask_b32_e32
    183 ; SI: v_cndmask_b32_e32
    184 define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
    185   %cmp = icmp eq i32 %c, 0
    186   %select = select i1 %cmp, <4 x double> %a, <4 x double> %b
    187   store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16
    188   ret void
    189 }
    190 
    191 ; FUNC-LABEL: {{^}}select_v8f64:
    192 ; SI: v_cndmask_b32_e32
    193 ; SI: v_cndmask_b32_e32
    194 ; SI: v_cndmask_b32_e32
    195 ; SI: v_cndmask_b32_e32
    196 ; SI: v_cndmask_b32_e32
    197 ; SI: v_cndmask_b32_e32
    198 ; SI: v_cndmask_b32_e32
    199 ; SI: v_cndmask_b32_e32
    200 ; SI: v_cndmask_b32_e32
    201 ; SI: v_cndmask_b32_e32
    202 ; SI: v_cndmask_b32_e32
    203 ; SI: v_cndmask_b32_e32
    204 ; SI: v_cndmask_b32_e32
    205 ; SI: v_cndmask_b32_e32
    206 ; SI: v_cndmask_b32_e32
    207 ; SI: v_cndmask_b32_e32
    208 define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
    209   %cmp = icmp eq i32 %c, 0
    210   %select = select i1 %cmp, <8 x double> %a, <8 x double> %b
    211   store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16
    212   ret void
    213 }
    214 
    215 ; Function Attrs: nounwind readnone
    216 declare i32 @llvm.amdgcn.workitem.id.x() #1
    217 
    218 attributes #0 = { nounwind }
    219 attributes #1 = { nounwind readnone }
    220