1 ; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4 ; Test expansion of scalar selects on vectors. 5 ; Evergreen not enabled since it seems to be having problems with doubles. 6 7 8 ; FUNC-LABEL: {{^}}select_v4i8: 9 ; SI: v_cndmask_b32_e32 10 ; SI: v_cndmask_b32_e32 11 ; SI: v_cndmask_b32_e32 12 ; SI: v_cndmask_b32_e32 13 define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind { 14 %cmp = icmp eq i8 %c, 0 15 %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b 16 store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4 17 ret void 18 } 19 20 ; FUNC-LABEL: {{^}}select_v4i16: 21 ; SI: v_cndmask_b32_e32 22 ; SI: v_cndmask_b32_e32 23 ; SI: v_cndmask_b32_e32 24 ; SI: v_cndmask_b32_e32 25 define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind { 26 %cmp = icmp eq i32 %c, 0 27 %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b 28 store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4 29 ret void 30 } 31 32 ; FIXME: Expansion with bitwise operations may be better if doing a 33 ; vector select with SGPR inputs. 34 35 ; FUNC-LABEL: {{^}}s_select_v2i32: 36 ; SI: v_cndmask_b32_e32 37 ; SI: v_cndmask_b32_e32 38 ; SI: buffer_store_dwordx2 39 define void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind { 40 %cmp = icmp eq i32 %c, 0 41 %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b 42 store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8 43 ret void 44 } 45 46 ; FUNC-LABEL: {{^}}s_select_v4i32: 47 ; SI: v_cndmask_b32_e32 48 ; SI: v_cndmask_b32_e32 49 ; SI: v_cndmask_b32_e32 50 ; SI: v_cndmask_b32_e32 51 ; SI: buffer_store_dwordx4 52 define void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind { 53 %cmp = icmp eq i32 %c, 0 54 %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b 55 store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16 56 ret void 57 } 58 59 ; FUNC-LABEL: {{^}}v_select_v4i32: 60 ; SI: buffer_load_dwordx4 61 ; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}} 62 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 63 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 64 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 65 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 66 ; SI: buffer_store_dwordx4 67 define void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 { 68 bb: 69 %tmp2 = icmp ult i32 %cond, 32 70 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in 71 %tmp3 = select i1 %tmp2, <4 x i32> %val, <4 x i32> zeroinitializer 72 store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 16 73 ret void 74 } 75 76 ; FUNC-LABEL: {{^}}select_v8i32: 77 ; SI: v_cndmask_b32_e32 78 ; SI: v_cndmask_b32_e32 79 ; SI: v_cndmask_b32_e32 80 ; SI: v_cndmask_b32_e32 81 ; SI: v_cndmask_b32_e32 82 ; SI: v_cndmask_b32_e32 83 ; SI: v_cndmask_b32_e32 84 ; SI: v_cndmask_b32_e32 85 define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind { 86 %cmp = icmp eq i32 %c, 0 87 %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b 88 store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16 89 ret void 90 } 91 92 ; FUNC-LABEL: {{^}}s_select_v2f32: 93 ; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} 94 ; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}} 95 96 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] 97 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]] 98 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]] 99 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] 100 ; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} 101 102 ; SI: v_cndmask_b32_e32 103 ; SI: v_cndmask_b32_e32 104 ; SI: buffer_store_dwordx2 105 define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind { 106 %cmp = icmp eq i32 %c, 0 107 %select = select i1 %cmp, <2 x float> %a, <2 x float> %b 108 store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16 109 ret void 110 } 111 112 ; FUNC-LABEL: {{^}}s_select_v4f32: 113 ; SI: s_load_dwordx4 114 ; SI: s_load_dwordx4 115 ; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} 116 117 ; SI: v_cndmask_b32_e32 118 ; SI: v_cndmask_b32_e32 119 ; SI: v_cndmask_b32_e32 120 ; SI: v_cndmask_b32_e32 121 122 ; SI: buffer_store_dwordx4 123 define void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind { 124 %cmp = icmp eq i32 %c, 0 125 %select = select i1 %cmp, <4 x float> %a, <4 x float> %b 126 store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16 127 ret void 128 } 129 130 ; FUNC-LABEL: {{^}}v_select_v4f32: 131 ; SI: buffer_load_dwordx4 132 ; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}} 133 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 134 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 135 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 136 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} 137 ; SI: buffer_store_dwordx4 138 define void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 { 139 bb: 140 %tmp2 = icmp ult i32 %cond, 32 141 %val = load <4 x float>, <4 x float> addrspace(1)* %in 142 %tmp3 = select i1 %tmp2, <4 x float> %val, <4 x float> zeroinitializer 143 store <4 x float> %tmp3, <4 x float> addrspace(1)* %out, align 16 144 ret void 145 } 146 147 ; FUNC-LABEL: {{^}}select_v8f32: 148 ; SI: v_cndmask_b32_e32 149 ; SI: v_cndmask_b32_e32 150 ; SI: v_cndmask_b32_e32 151 ; SI: v_cndmask_b32_e32 152 ; SI: v_cndmask_b32_e32 153 ; SI: v_cndmask_b32_e32 154 ; SI: v_cndmask_b32_e32 155 ; SI: v_cndmask_b32_e32 156 define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind { 157 %cmp = icmp eq i32 %c, 0 158 %select = select i1 %cmp, <8 x float> %a, <8 x float> %b 159 store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16 160 ret void 161 } 162 163 ; FUNC-LABEL: {{^}}select_v2f64: 164 ; SI: v_cndmask_b32_e32 165 ; SI: v_cndmask_b32_e32 166 ; SI: v_cndmask_b32_e32 167 ; SI: v_cndmask_b32_e32 168 define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind { 169 %cmp = icmp eq i32 %c, 0 170 %select = select i1 %cmp, <2 x double> %a, <2 x double> %b 171 store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16 172 ret void 173 } 174 175 ; FUNC-LABEL: {{^}}select_v4f64: 176 ; SI: v_cndmask_b32_e32 177 ; SI: v_cndmask_b32_e32 178 ; SI: v_cndmask_b32_e32 179 ; SI: v_cndmask_b32_e32 180 ; SI: v_cndmask_b32_e32 181 ; SI: v_cndmask_b32_e32 182 ; SI: v_cndmask_b32_e32 183 ; SI: v_cndmask_b32_e32 184 define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind { 185 %cmp = icmp eq i32 %c, 0 186 %select = select i1 %cmp, <4 x double> %a, <4 x double> %b 187 store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16 188 ret void 189 } 190 191 ; FUNC-LABEL: {{^}}select_v8f64: 192 ; SI: v_cndmask_b32_e32 193 ; SI: v_cndmask_b32_e32 194 ; SI: v_cndmask_b32_e32 195 ; SI: v_cndmask_b32_e32 196 ; SI: v_cndmask_b32_e32 197 ; SI: v_cndmask_b32_e32 198 ; SI: v_cndmask_b32_e32 199 ; SI: v_cndmask_b32_e32 200 ; SI: v_cndmask_b32_e32 201 ; SI: v_cndmask_b32_e32 202 ; SI: v_cndmask_b32_e32 203 ; SI: v_cndmask_b32_e32 204 ; SI: v_cndmask_b32_e32 205 ; SI: v_cndmask_b32_e32 206 ; SI: v_cndmask_b32_e32 207 ; SI: v_cndmask_b32_e32 208 define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind { 209 %cmp = icmp eq i32 %c, 0 210 %select = select i1 %cmp, <8 x double> %a, <8 x double> %b 211 store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16 212 ret void 213 } 214 215 ; Function Attrs: nounwind readnone 216 declare i32 @llvm.amdgcn.workitem.id.x() #1 217 218 attributes #0 = { nounwind } 219 attributes #1 = { nounwind readnone } 220