1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone 5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 6 7 8 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32: 9 ; SI: s_load_dword [[ARG:s[0-9]+]], 10 ; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 11 ; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] 12 ; SI: buffer_store_dword [[EXTRACT]], 13 14 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 15 ; EG: LSHR * [[ADDR]] 16 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 17 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { 18 %shl = shl i32 %in, 31 19 %sext = ashr i32 %shl, 31 20 store i32 %sext, i32 addrspace(1)* %out 21 ret void 22 } 23 24 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32: 25 ; SI: s_add_i32 [[VAL:s[0-9]+]], 26 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]] 27 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 28 ; SI: buffer_store_dword [[VEXTRACT]], 29 30 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 31 ; EG: ADD_INT 32 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 33 ; EG-NEXT: LSHR * [[ADDR]] 34 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 35 %c = add i32 %a, %b ; add to prevent folding into extload 36 %shl = shl i32 %c, 24 37 %ashr = ashr i32 %shl, 24 38 store i32 %ashr, i32 addrspace(1)* %out, align 4 39 ret void 40 } 41 42 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32: 43 ; SI: s_add_i32 [[VAL:s[0-9]+]], 44 ; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]] 45 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 46 ; SI: buffer_store_dword [[VEXTRACT]], 47 48 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 49 ; EG: ADD_INT 50 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 51 ; EG-NEXT: LSHR * [[ADDR]] 52 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 53 %c = add i32 %a, %b ; add to prevent folding into extload 54 %shl = shl i32 %c, 16 55 %ashr = ashr i32 %shl, 16 56 store i32 %ashr, i32 addrspace(1)* %out, align 4 57 ret void 58 } 59 60 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32: 61 ; SI: s_add_i32 [[VAL:s[0-9]+]], 62 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]] 63 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 64 ; SI: buffer_store_dword [[VEXTRACT]], 65 66 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 67 ; EG: ADD_INT 68 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 69 ; EG-NEXT: LSHR * [[ADDR]] 70 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { 71 %c = add <1 x i32> %a, %b ; add to prevent folding into extload 72 %shl = shl <1 x i32> %c, <i32 24> 73 %ashr = ashr <1 x i32> %shl, <i32 24> 74 store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4 75 ret void 76 } 77 78 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64: 79 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 80 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000 81 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 82 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 83 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 84 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 85 %c = shl i64 %a, %b 86 %shl = shl i64 %c, 63 87 %ashr = ashr i64 %shl, 63 88 store i64 %ashr, i64 addrspace(1)* %out, align 8 89 ret void 90 } 91 92 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64: 93 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 94 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000 95 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 96 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 97 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 98 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 99 %c = shl i64 %a, %b 100 %shl = shl i64 %c, 56 101 %ashr = ashr i64 %shl, 56 102 store i64 %ashr, i64 addrspace(1)* %out, align 8 103 ret void 104 } 105 106 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64: 107 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 108 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000 109 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 110 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 111 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 112 113 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 114 %c = shl i64 %a, %b 115 %shl = shl i64 %c, 48 116 %ashr = ashr i64 %shl, 48 117 store i64 %ashr, i64 addrspace(1)* %out, align 8 118 ret void 119 } 120 121 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64: 122 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 123 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000 124 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 125 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 126 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 127 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 128 %c = shl i64 %a, %b 129 %shl = shl i64 %c, 32 130 %ashr = ashr i64 %shl, 32 131 store i64 %ashr, i64 addrspace(1)* %out, align 8 132 ret void 133 } 134 135 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. 136 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64: 137 ; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 138 ; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31 139 ; XSI: buffer_store_dword 140 ; XEG: BFE_INT 141 ; XEG: ASHR 142 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind { 143 ; %c = add <1 x i64> %a, %b 144 ; %shl = shl <1 x i64> %c, <i64 56> 145 ; %ashr = ashr <1 x i64> %shl, <i64 56> 146 ; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8 147 ; ret void 148 ; } 149 150 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64: 151 ; SI: buffer_load_dwordx2 152 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 153 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 154 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 155 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 156 define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 157 %tid = call i32 @llvm.r600.read.tidig.x() 158 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 159 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 160 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 161 %a = load i64, i64 addrspace(1)* %a.gep, align 8 162 %b = load i64, i64 addrspace(1)* %b.gep, align 8 163 164 %c = shl i64 %a, %b 165 %shl = shl i64 %c, 63 166 %ashr = ashr i64 %shl, 63 167 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 168 ret void 169 } 170 171 ; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64: 172 ; SI: buffer_load_dwordx2 173 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 174 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8 175 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 176 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 177 define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 178 %tid = call i32 @llvm.r600.read.tidig.x() 179 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 180 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 181 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 182 %a = load i64, i64 addrspace(1)* %a.gep, align 8 183 %b = load i64, i64 addrspace(1)* %b.gep, align 8 184 185 %c = shl i64 %a, %b 186 %shl = shl i64 %c, 56 187 %ashr = ashr i64 %shl, 56 188 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 189 ret void 190 } 191 192 ; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64: 193 ; SI: buffer_load_dwordx2 194 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 195 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16 196 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 197 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 198 define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 199 %tid = call i32 @llvm.r600.read.tidig.x() 200 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 201 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 202 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 203 %a = load i64, i64 addrspace(1)* %a.gep, align 8 204 %b = load i64, i64 addrspace(1)* %b.gep, align 8 205 206 %c = shl i64 %a, %b 207 %shl = shl i64 %c, 48 208 %ashr = ashr i64 %shl, 48 209 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 210 ret void 211 } 212 213 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64: 214 ; SI: buffer_load_dwordx2 215 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 216 ; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] 217 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}} 218 define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 219 %tid = call i32 @llvm.r600.read.tidig.x() 220 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 221 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 222 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 223 %a = load i64, i64 addrspace(1)* %a.gep, align 8 224 %b = load i64, i64 addrspace(1)* %b.gep, align 8 225 226 %c = shl i64 %a, %b 227 %shl = shl i64 %c, 32 228 %ashr = ashr i64 %shl, 32 229 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 230 ret void 231 } 232 233 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount: 234 ; SI-NOT: s_lshl 235 ; SI-NOT: s_ashr 236 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 237 238 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 239 ; EG-NOT: BFE 240 ; EG: ADD_INT 241 ; EG: LSHL 242 ; EG: ASHR [[RES]] 243 ; EG: LSHR {{\*?}} [[ADDR]] 244 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 245 %c = add i32 %a, %b 246 %x = shl i32 %c, 6 247 %y = ashr i32 %x, 7 248 store i32 %y, i32 addrspace(1)* %out 249 ret void 250 } 251 252 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount: 253 ; SI-NOT: s_lshl 254 ; SI-NOT: s_ashr 255 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 256 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 257 ; SI: s_endpgm 258 259 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 260 ; EG-NOT: BFE 261 ; EG: ADD_INT 262 ; EG: LSHL 263 ; EG: ASHR [[RES]] 264 ; EG: LSHL 265 ; EG: ASHR [[RES]] 266 ; EG: LSHR {{\*?}} [[ADDR]] 267 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 268 %c = add <2 x i32> %a, %b 269 %x = shl <2 x i32> %c, <i32 6, i32 6> 270 %y = ashr <2 x i32> %x, <i32 7, i32 7> 271 store <2 x i32> %y, <2 x i32> addrspace(1)* %out 272 ret void 273 } 274 275 276 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32: 277 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 278 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 279 ; SI: buffer_store_dwordx2 280 281 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 282 ; EG: BFE_INT [[RES]] 283 ; EG: BFE_INT [[RES]] 284 ; EG: LSHR {{\*?}} [[ADDR]] 285 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 286 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 287 %shl = shl <2 x i32> %c, <i32 31, i32 31> 288 %ashr = ashr <2 x i32> %shl, <i32 31, i32 31> 289 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 290 ret void 291 } 292 293 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32: 294 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 295 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 296 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 297 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 298 ; SI: buffer_store_dwordx4 299 300 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 301 ; EG: BFE_INT [[RES]] 302 ; EG: BFE_INT [[RES]] 303 ; EG: BFE_INT [[RES]] 304 ; EG: BFE_INT [[RES]] 305 ; EG: LSHR {{\*?}} [[ADDR]] 306 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 307 %c = add <4 x i32> %a, %b ; add to prevent folding into extload 308 %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> 309 %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31> 310 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 311 ret void 312 } 313 314 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32: 315 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 316 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 317 ; SI: buffer_store_dwordx2 318 319 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 320 ; EG: BFE_INT [[RES]] 321 ; EG: BFE_INT [[RES]] 322 ; EG: LSHR {{\*?}} [[ADDR]] 323 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 324 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 325 %shl = shl <2 x i32> %c, <i32 24, i32 24> 326 %ashr = ashr <2 x i32> %shl, <i32 24, i32 24> 327 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 328 ret void 329 } 330 331 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32: 332 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 333 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 334 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 335 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 336 ; SI: buffer_store_dwordx4 337 338 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 339 ; EG: BFE_INT [[RES]] 340 ; EG: BFE_INT [[RES]] 341 ; EG: BFE_INT [[RES]] 342 ; EG: BFE_INT [[RES]] 343 ; EG: LSHR {{\*?}} [[ADDR]] 344 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 345 %c = add <4 x i32> %a, %b ; add to prevent folding into extload 346 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> 347 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 348 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 349 ret void 350 } 351 352 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32: 353 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}} 354 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}} 355 ; SI: buffer_store_dwordx2 356 357 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 358 ; EG: BFE_INT [[RES]] 359 ; EG: BFE_INT [[RES]] 360 ; EG: LSHR {{\*?}} [[ADDR]] 361 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 362 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 363 %shl = shl <2 x i32> %c, <i32 16, i32 16> 364 %ashr = ashr <2 x i32> %shl, <i32 16, i32 16> 365 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 366 ret void 367 } 368 369 ; FUNC-LABEL: {{^}}testcase: 370 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind { 371 %and_a_1 = and i8 %a, 1 372 %cmp_eq = icmp eq i8 %and_a_1, 0 373 %cmp_slt = icmp slt i8 %a, 0 374 %sel0 = select i1 %cmp_slt, i8 0, i8 %a 375 %sel1 = select i1 %cmp_eq, i8 0, i8 %a 376 %xor = xor i8 %sel0, %sel1 377 store i8 %xor, i8 addrspace(1)* %out 378 ret void 379 } 380 381 ; FUNC-LABEL: {{^}}testcase_3: 382 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { 383 %and_a_1 = and i8 %a, 1 384 %cmp_eq = icmp eq i8 %and_a_1, 0 385 %cmp_slt = icmp slt i8 %a, 0 386 %sel0 = select i1 %cmp_slt, i8 0, i8 %a 387 %sel1 = select i1 %cmp_eq, i8 0, i8 %a 388 %xor = xor i8 %sel0, %sel1 389 store i8 %xor, i8 addrspace(1)* %out 390 ret void 391 } 392 393 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32: 394 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 395 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 396 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 397 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 398 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { 399 %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 400 %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 401 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload 402 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> 403 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 404 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 405 ret void 406 } 407 408 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32: 409 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 410 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 411 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { 412 %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 413 %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 414 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload 415 %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> 416 %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> 417 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 418 ret void 419 } 420 421 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type: 422 ; SI: buffer_load_sbyte 423 ; SI: v_max_i32 424 ; SI-NOT: bfe 425 ; SI: buffer_store_short 426 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { 427 %tmp5 = load i8, i8 addrspace(1)* %src, align 1 428 %tmp2 = sext i8 %tmp5 to i32 429 %tmp2.5 = icmp sgt i32 %tmp2, 0 430 %tmp3 = select i1 %tmp2.5, i32 %tmp2, i32 0 431 %tmp4 = trunc i32 %tmp3 to i8 432 %tmp6 = sext i8 %tmp4 to i16 433 store i16 %tmp6, i16 addrspace(1)* %out, align 2 434 ret void 435 } 436 437 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone 438 439 ; FUNC-LABEL: {{^}}bfe_0_width: 440 ; SI-NOT: {{[^@]}}bfe 441 ; SI: s_endpgm 442 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 443 %load = load i32, i32 addrspace(1)* %ptr, align 4 444 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone 445 store i32 %bfe, i32 addrspace(1)* %out, align 4 446 ret void 447 } 448 449 ; FUNC-LABEL: {{^}}bfe_8_bfe_8: 450 ; SI: v_bfe_i32 451 ; SI-NOT: {{[^@]}}bfe 452 ; SI: s_endpgm 453 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 454 %load = load i32, i32 addrspace(1)* %ptr, align 4 455 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone 456 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone 457 store i32 %bfe1, i32 addrspace(1)* %out, align 4 458 ret void 459 } 460 461 ; FUNC-LABEL: {{^}}bfe_8_bfe_16: 462 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 463 ; SI: s_endpgm 464 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 465 %load = load i32, i32 addrspace(1)* %ptr, align 4 466 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone 467 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone 468 store i32 %bfe1, i32 addrspace(1)* %out, align 4 469 ret void 470 } 471 472 ; This really should be folded into 1 473 ; FUNC-LABEL: {{^}}bfe_16_bfe_8: 474 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 475 ; SI-NOT: {{[^@]}}bfe 476 ; SI: s_endpgm 477 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 478 %load = load i32, i32 addrspace(1)* %ptr, align 4 479 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone 480 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone 481 store i32 %bfe1, i32 addrspace(1)* %out, align 4 482 ret void 483 } 484 485 ; Make sure there isn't a redundant BFE 486 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe: 487 ; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}} 488 ; SI-NOT: {{[^@]}}bfe 489 ; SI: s_endpgm 490 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 491 %c = add i32 %a, %b ; add to prevent folding into extload 492 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone 493 %shl = shl i32 %bfe, 24 494 %ashr = ashr i32 %shl, 24 495 store i32 %ashr, i32 addrspace(1)* %out, align 4 496 ret void 497 } 498 499 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong: 500 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 501 %c = add i32 %a, %b ; add to prevent folding into extload 502 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone 503 %shl = shl i32 %bfe, 24 504 %ashr = ashr i32 %shl, 24 505 store i32 %ashr, i32 addrspace(1)* %out, align 4 506 ret void 507 } 508 509 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe: 510 ; SI: buffer_load_sbyte 511 ; SI-NOT: {{[^@]}}bfe 512 ; SI: s_endpgm 513 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { 514 %load = load i8, i8 addrspace(1)* %ptr, align 1 515 %sext = sext i8 %load to i32 516 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone 517 %shl = shl i32 %bfe, 24 518 %ashr = ashr i32 %shl, 24 519 store i32 %ashr, i32 addrspace(1)* %out, align 4 520 ret void 521 } 522 523 ; SI: .text 524 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}} 525 ; SI-NOT: {{[^@]}}bfe 526 ; SI: s_endpgm 527 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { 528 %load = load i8, i8 addrspace(1)* %ptr, align 1 529 %sext = sext i8 %load to i32 530 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone 531 %shl = shl i32 %bfe, 24 532 %ashr = ashr i32 %shl, 24 533 store i32 %ashr, i32 addrspace(1)* %out, align 4 534 ret void 535 } 536 537 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0: 538 ; SI-NOT: shr 539 ; SI-NOT: shl 540 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 541 ; SI: s_endpgm 542 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 543 %x = load i32, i32 addrspace(1)* %in, align 4 544 %shl = shl i32 %x, 31 545 %shr = ashr i32 %shl, 31 546 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) 547 store i32 %bfe, i32 addrspace(1)* %out, align 4 548 ret void 549 } 550 551 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1: 552 ; SI: buffer_load_dword 553 ; SI-NOT: shl 554 ; SI-NOT: shr 555 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 556 ; SI: s_endpgm 557 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 558 %x = load i32, i32 addrspace(1)* %in, align 4 559 %shl = shl i32 %x, 30 560 %shr = ashr i32 %shl, 30 561 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) 562 store i32 %bfe, i32 addrspace(1)* %out, align 4 563 ret void 564 } 565 566 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1: 567 ; SI: buffer_load_dword 568 ; SI-NOT: v_lshl 569 ; SI-NOT: v_ashr 570 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2 571 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 572 ; SI: s_endpgm 573 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 574 %x = load i32, i32 addrspace(1)* %in, align 4 575 %shl = shl i32 %x, 30 576 %shr = ashr i32 %shl, 30 577 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) 578 store i32 %bfe, i32 addrspace(1)* %out, align 4 579 ret void 580 } 581 582 ; Make sure we propagate the VALUness to users of a moved scalar BFE. 583 584 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use: 585 ; SI: buffer_load_dwordx2 586 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 587 ; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 588 ; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 589 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] 590 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]] 591 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 592 define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { 593 %tid = call i32 @llvm.r600.read.tidig.x() 594 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 595 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 596 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 597 %a = load i64, i64 addrspace(1)* %a.gep, align 8 598 %b = load i64, i64 addrspace(1)* %b.gep, align 8 599 600 %c = shl i64 %a, %b 601 %shl = shl i64 %c, 63 602 %ashr = ashr i64 %shl, 63 603 604 %and = and i64 %ashr, %s.val 605 store i64 %and, i64 addrspace(1)* %out.gep, align 8 606 ret void 607 } 608 609 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use: 610 ; SI: buffer_load_dwordx2 611 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 612 ; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] 613 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] 614 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]] 615 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 616 define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { 617 %tid = call i32 @llvm.r600.read.tidig.x() 618 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 619 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 620 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 621 %a = load i64, i64 addrspace(1)* %a.gep, align 8 622 %b = load i64, i64 addrspace(1)* %b.gep, align 8 623 624 %c = shl i64 %a, %b 625 %shl = shl i64 %c, 32 626 %ashr = ashr i64 %shl, 32 627 %and = and i64 %ashr, %s.val 628 store i64 %and, i64 addrspace(1)* %out.gep, align 8 629 ret void 630 } 631