1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone 5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 6 7 8 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32: 9 ; SI: s_load_dword [[ARG:s[0-9]+]], 10 ; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 11 ; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] 12 ; SI: buffer_store_dword [[EXTRACT]], 13 14 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 15 ; EG: LSHR * [[ADDR]] 16 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 17 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { 18 %shl = shl i32 %in, 31 19 %sext = ashr i32 %shl, 31 20 store i32 %sext, i32 addrspace(1)* %out 21 ret void 22 } 23 24 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32: 25 ; SI: s_add_i32 [[VAL:s[0-9]+]], 26 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]] 27 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 28 ; SI: buffer_store_dword [[VEXTRACT]], 29 30 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 31 ; EG: ADD_INT 32 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 33 ; EG-NEXT: LSHR * [[ADDR]] 34 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 35 %c = add i32 %a, %b ; add to prevent folding into extload 36 %shl = shl i32 %c, 24 37 %ashr = ashr i32 %shl, 24 38 store i32 %ashr, i32 addrspace(1)* %out, align 4 39 ret void 40 } 41 42 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32: 43 ; SI: s_add_i32 [[VAL:s[0-9]+]], 44 ; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]] 45 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 46 ; SI: buffer_store_dword [[VEXTRACT]], 47 48 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 49 ; EG: ADD_INT 50 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 51 ; EG-NEXT: LSHR * [[ADDR]] 52 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 53 %c = add i32 %a, %b ; add to prevent folding into extload 54 %shl = shl i32 %c, 16 55 %ashr = ashr i32 %shl, 16 56 store i32 %ashr, i32 addrspace(1)* %out, align 4 57 ret void 58 } 59 60 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32: 61 ; SI: s_add_i32 [[VAL:s[0-9]+]], 62 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]] 63 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] 64 ; SI: buffer_store_dword [[VEXTRACT]], 65 66 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 67 ; EG: ADD_INT 68 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal 69 ; EG-NEXT: LSHR * [[ADDR]] 70 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { 71 %c = add <1 x i32> %a, %b ; add to prevent folding into extload 72 %shl = shl <1 x i32> %c, <i32 24> 73 %ashr = ashr <1 x i32> %shl, <i32 24> 74 store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4 75 ret void 76 } 77 78 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64: 79 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 80 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000 81 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 82 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 83 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 84 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 85 %c = shl i64 %a, %b 86 %shl = shl i64 %c, 63 87 %ashr = ashr i64 %shl, 63 88 store i64 %ashr, i64 addrspace(1)* %out, align 8 89 ret void 90 } 91 92 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64: 93 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 94 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000 95 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 96 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 97 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 98 99 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] 100 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] 101 ; EG: LSHL 102 ; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal 103 ; EG: ASHR [[RES_HI]] 104 ; EG-NOT: BFE_INT 105 ; EG: LSHR 106 ; EG: LSHR 107 ;; TODO Check address computation, using | with variables in {{}} does not work, 108 ;; also the _LO/_HI order might be different 109 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 110 %c = shl i64 %a, %b 111 %shl = shl i64 %c, 56 112 %ashr = ashr i64 %shl, 56 113 store i64 %ashr, i64 addrspace(1)* %out, align 8 114 ret void 115 } 116 117 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64: 118 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 119 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000 120 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 121 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 122 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 123 124 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] 125 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] 126 ; EG: LSHL 127 ; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal 128 ; EG: ASHR [[RES_HI]] 129 ; EG-NOT: BFE_INT 130 ; EG: LSHR 131 ; EG: LSHR 132 ;; TODO Check address computation, using | with variables in {{}} does not work, 133 ;; also the _LO/_HI order might be different 134 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 135 %c = shl i64 %a, %b 136 %shl = shl i64 %c, 48 137 %ashr = ashr i64 %shl, 48 138 store i64 %ashr, i64 addrspace(1)* %out, align 8 139 ret void 140 } 141 142 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64: 143 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] 144 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000 145 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] 146 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] 147 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 148 149 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] 150 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] 151 ; EG-NOT: BFE_INT 152 153 ; EG: ASHR [[RES_HI]] 154 155 ; EG: LSHR 156 ; EG: LSHR 157 ;; TODO Check address computation, using | with variables in {{}} does not work, 158 ;; also the _LO/_HI order might be different 159 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 160 %c = shl i64 %a, %b 161 %shl = shl i64 %c, 32 162 %ashr = ashr i64 %shl, 32 163 store i64 %ashr, i64 addrspace(1)* %out, align 8 164 ret void 165 } 166 167 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. 168 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64: 169 ; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 170 ; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31 171 ; XSI: buffer_store_dword 172 ; XEG: BFE_INT 173 ; XEG: ASHR 174 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind { 175 ; %c = add <1 x i64> %a, %b 176 ; %shl = shl <1 x i64> %c, <i64 56> 177 ; %ashr = ashr <1 x i64> %shl, <i64 56> 178 ; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8 179 ; ret void 180 ; } 181 182 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64: 183 ; SI: buffer_load_dwordx2 184 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 185 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 186 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 187 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 188 define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 189 %tid = call i32 @llvm.r600.read.tidig.x() 190 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 191 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 192 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 193 %a = load i64, i64 addrspace(1)* %a.gep, align 8 194 %b = load i64, i64 addrspace(1)* %b.gep, align 8 195 196 %c = shl i64 %a, %b 197 %shl = shl i64 %c, 63 198 %ashr = ashr i64 %shl, 63 199 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 200 ret void 201 } 202 203 ; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64: 204 ; SI: buffer_load_dwordx2 205 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 206 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8 207 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 208 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 209 define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 210 %tid = call i32 @llvm.r600.read.tidig.x() 211 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 212 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 213 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 214 %a = load i64, i64 addrspace(1)* %a.gep, align 8 215 %b = load i64, i64 addrspace(1)* %b.gep, align 8 216 217 %c = shl i64 %a, %b 218 %shl = shl i64 %c, 56 219 %ashr = ashr i64 %shl, 56 220 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 221 ret void 222 } 223 224 ; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64: 225 ; SI: buffer_load_dwordx2 226 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 227 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16 228 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 229 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 230 define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 231 %tid = call i32 @llvm.r600.read.tidig.x() 232 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 233 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 234 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 235 %a = load i64, i64 addrspace(1)* %a.gep, align 8 236 %b = load i64, i64 addrspace(1)* %b.gep, align 8 237 238 %c = shl i64 %a, %b 239 %shl = shl i64 %c, 48 240 %ashr = ashr i64 %shl, 48 241 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 242 ret void 243 } 244 245 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64: 246 ; SI: buffer_load_dwordx2 247 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 248 ; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] 249 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}} 250 define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { 251 %tid = call i32 @llvm.r600.read.tidig.x() 252 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 253 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 254 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 255 %a = load i64, i64 addrspace(1)* %a.gep, align 8 256 %b = load i64, i64 addrspace(1)* %b.gep, align 8 257 258 %c = shl i64 %a, %b 259 %shl = shl i64 %c, 32 260 %ashr = ashr i64 %shl, 32 261 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 262 ret void 263 } 264 265 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount: 266 ; SI-NOT: s_lshl 267 ; SI-NOT: s_ashr 268 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 269 270 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] 271 ; EG-NOT: BFE 272 ; EG: ADD_INT 273 ; EG: LSHL 274 ; EG: ASHR [[RES]] 275 ; EG: LSHR {{\*?}} [[ADDR]] 276 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 277 %c = add i32 %a, %b 278 %x = shl i32 %c, 6 279 %y = ashr i32 %x, 7 280 store i32 %y, i32 addrspace(1)* %out 281 ret void 282 } 283 284 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount: 285 ; SI-NOT: s_lshl 286 ; SI-NOT: s_ashr 287 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 288 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 289 ; SI: s_endpgm 290 291 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 292 ; EG-NOT: BFE 293 ; EG: ADD_INT 294 ; EG: LSHL 295 ; EG: ASHR [[RES]] 296 ; EG: LSHL 297 ; EG: ASHR [[RES]] 298 ; EG: LSHR {{\*?}} [[ADDR]] 299 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 300 %c = add <2 x i32> %a, %b 301 %x = shl <2 x i32> %c, <i32 6, i32 6> 302 %y = ashr <2 x i32> %x, <i32 7, i32 7> 303 store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2 304 ret void 305 } 306 307 308 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32: 309 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 310 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 311 ; SI: buffer_store_dwordx2 312 313 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 314 ; EG: BFE_INT [[RES]] 315 ; EG: BFE_INT [[RES]] 316 ; EG: LSHR {{\*?}} [[ADDR]] 317 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 318 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 319 %shl = shl <2 x i32> %c, <i32 31, i32 31> 320 %ashr = ashr <2 x i32> %shl, <i32 31, i32 31> 321 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 322 ret void 323 } 324 325 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32: 326 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 327 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 328 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 329 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 330 ; SI: buffer_store_dwordx4 331 332 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 333 ; EG: BFE_INT [[RES]] 334 ; EG: BFE_INT [[RES]] 335 ; EG: BFE_INT [[RES]] 336 ; EG: BFE_INT [[RES]] 337 ; EG: LSHR {{\*?}} [[ADDR]] 338 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 339 %c = add <4 x i32> %a, %b ; add to prevent folding into extload 340 %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> 341 %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31> 342 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 343 ret void 344 } 345 346 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32: 347 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 348 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 349 ; SI: buffer_store_dwordx2 350 351 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 352 ; EG: BFE_INT [[RES]] 353 ; EG: BFE_INT [[RES]] 354 ; EG: LSHR {{\*?}} [[ADDR]] 355 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 356 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 357 %shl = shl <2 x i32> %c, <i32 24, i32 24> 358 %ashr = ashr <2 x i32> %shl, <i32 24, i32 24> 359 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 360 ret void 361 } 362 363 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32: 364 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 365 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 366 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 367 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} 368 ; SI: buffer_store_dwordx4 369 370 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 371 ; EG: BFE_INT [[RES]] 372 ; EG: BFE_INT [[RES]] 373 ; EG: BFE_INT [[RES]] 374 ; EG: BFE_INT [[RES]] 375 ; EG: LSHR {{\*?}} [[ADDR]] 376 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { 377 %c = add <4 x i32> %a, %b ; add to prevent folding into extload 378 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> 379 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 380 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 381 ret void 382 } 383 384 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32: 385 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}} 386 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}} 387 ; SI: buffer_store_dwordx2 388 389 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] 390 ; EG: BFE_INT [[RES]] 391 ; EG: BFE_INT [[RES]] 392 ; EG: LSHR {{\*?}} [[ADDR]] 393 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { 394 %c = add <2 x i32> %a, %b ; add to prevent folding into extload 395 %shl = shl <2 x i32> %c, <i32 16, i32 16> 396 %ashr = ashr <2 x i32> %shl, <i32 16, i32 16> 397 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 398 ret void 399 } 400 401 ; FUNC-LABEL: {{^}}testcase: 402 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind { 403 %and_a_1 = and i8 %a, 1 404 %cmp_eq = icmp eq i8 %and_a_1, 0 405 %cmp_slt = icmp slt i8 %a, 0 406 %sel0 = select i1 %cmp_slt, i8 0, i8 %a 407 %sel1 = select i1 %cmp_eq, i8 0, i8 %a 408 %xor = xor i8 %sel0, %sel1 409 store i8 %xor, i8 addrspace(1)* %out 410 ret void 411 } 412 413 ; FUNC-LABEL: {{^}}testcase_3: 414 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { 415 %and_a_1 = and i8 %a, 1 416 %cmp_eq = icmp eq i8 %and_a_1, 0 417 %cmp_slt = icmp slt i8 %a, 0 418 %sel0 = select i1 %cmp_slt, i8 0, i8 %a 419 %sel1 = select i1 %cmp_eq, i8 0, i8 %a 420 %xor = xor i8 %sel0, %sel1 421 store i8 %xor, i8 addrspace(1)* %out 422 ret void 423 } 424 425 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32: 426 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 427 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 428 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 429 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 430 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { 431 %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 432 %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 433 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload 434 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> 435 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 436 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 437 ret void 438 } 439 440 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32: 441 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 442 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 443 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { 444 %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 445 %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 446 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload 447 %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> 448 %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> 449 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 450 ret void 451 } 452 453 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type: 454 ; SI: buffer_load_sbyte 455 ; SI: v_max_i32 456 ; SI-NOT: bfe 457 ; SI: buffer_store_short 458 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { 459 %tmp5 = load i8, i8 addrspace(1)* %src, align 1 460 %tmp2 = sext i8 %tmp5 to i32 461 %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone 462 %tmp4 = trunc i32 %tmp3 to i8 463 %tmp6 = sext i8 %tmp4 to i16 464 store i16 %tmp6, i16 addrspace(1)* %out, align 2 465 ret void 466 } 467 468 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone 469 470 ; FUNC-LABEL: {{^}}bfe_0_width: 471 ; SI-NOT: {{[^@]}}bfe 472 ; SI: s_endpgm 473 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 474 %load = load i32, i32 addrspace(1)* %ptr, align 4 475 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone 476 store i32 %bfe, i32 addrspace(1)* %out, align 4 477 ret void 478 } 479 480 ; FUNC-LABEL: {{^}}bfe_8_bfe_8: 481 ; SI: v_bfe_i32 482 ; SI-NOT: {{[^@]}}bfe 483 ; SI: s_endpgm 484 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 485 %load = load i32, i32 addrspace(1)* %ptr, align 4 486 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone 487 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone 488 store i32 %bfe1, i32 addrspace(1)* %out, align 4 489 ret void 490 } 491 492 ; FUNC-LABEL: {{^}}bfe_8_bfe_16: 493 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 494 ; SI: s_endpgm 495 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 496 %load = load i32, i32 addrspace(1)* %ptr, align 4 497 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone 498 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone 499 store i32 %bfe1, i32 addrspace(1)* %out, align 4 500 ret void 501 } 502 503 ; This really should be folded into 1 504 ; FUNC-LABEL: {{^}}bfe_16_bfe_8: 505 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 506 ; SI-NOT: {{[^@]}}bfe 507 ; SI: s_endpgm 508 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { 509 %load = load i32, i32 addrspace(1)* %ptr, align 4 510 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone 511 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone 512 store i32 %bfe1, i32 addrspace(1)* %out, align 4 513 ret void 514 } 515 516 ; Make sure there isn't a redundant BFE 517 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe: 518 ; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}} 519 ; SI-NOT: {{[^@]}}bfe 520 ; SI: s_endpgm 521 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 522 %c = add i32 %a, %b ; add to prevent folding into extload 523 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone 524 %shl = shl i32 %bfe, 24 525 %ashr = ashr i32 %shl, 24 526 store i32 %ashr, i32 addrspace(1)* %out, align 4 527 ret void 528 } 529 530 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong: 531 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 532 %c = add i32 %a, %b ; add to prevent folding into extload 533 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone 534 %shl = shl i32 %bfe, 24 535 %ashr = ashr i32 %shl, 24 536 store i32 %ashr, i32 addrspace(1)* %out, align 4 537 ret void 538 } 539 540 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe: 541 ; SI: buffer_load_sbyte 542 ; SI-NOT: {{[^@]}}bfe 543 ; SI: s_endpgm 544 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { 545 %load = load i8, i8 addrspace(1)* %ptr, align 1 546 %sext = sext i8 %load to i32 547 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone 548 %shl = shl i32 %bfe, 24 549 %ashr = ashr i32 %shl, 24 550 store i32 %ashr, i32 addrspace(1)* %out, align 4 551 ret void 552 } 553 554 ; SI: .text 555 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}} 556 ; SI-NOT: {{[^@]}}bfe 557 ; SI: s_endpgm 558 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { 559 %load = load i8, i8 addrspace(1)* %ptr, align 1 560 %sext = sext i8 %load to i32 561 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone 562 %shl = shl i32 %bfe, 24 563 %ashr = ashr i32 %shl, 24 564 store i32 %ashr, i32 addrspace(1)* %out, align 4 565 ret void 566 } 567 568 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0: 569 ; SI-NOT: shr 570 ; SI-NOT: shl 571 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 572 ; SI: s_endpgm 573 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 574 %x = load i32, i32 addrspace(1)* %in, align 4 575 %shl = shl i32 %x, 31 576 %shr = ashr i32 %shl, 31 577 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) 578 store i32 %bfe, i32 addrspace(1)* %out, align 4 579 ret void 580 } 581 582 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1: 583 ; SI: buffer_load_dword 584 ; SI-NOT: shl 585 ; SI-NOT: shr 586 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 587 ; SI: s_endpgm 588 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 589 %x = load i32, i32 addrspace(1)* %in, align 4 590 %shl = shl i32 %x, 30 591 %shr = ashr i32 %shl, 30 592 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) 593 store i32 %bfe, i32 addrspace(1)* %out, align 4 594 ret void 595 } 596 597 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1: 598 ; SI: buffer_load_dword 599 ; SI-NOT: v_lshl 600 ; SI-NOT: v_ashr 601 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2 602 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 603 ; SI: s_endpgm 604 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 605 %x = load i32, i32 addrspace(1)* %in, align 4 606 %shl = shl i32 %x, 30 607 %shr = ashr i32 %shl, 30 608 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) 609 store i32 %bfe, i32 addrspace(1)* %out, align 4 610 ret void 611 } 612 613 ; Make sure we propagate the VALUness to users of a moved scalar BFE. 614 615 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use: 616 ; SI: buffer_load_dwordx2 617 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} 618 ; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 619 ; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 620 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] 621 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]] 622 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 623 define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { 624 %tid = call i32 @llvm.r600.read.tidig.x() 625 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 626 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 627 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 628 %a = load i64, i64 addrspace(1)* %a.gep, align 8 629 %b = load i64, i64 addrspace(1)* %b.gep, align 8 630 631 %c = shl i64 %a, %b 632 %shl = shl i64 %c, 63 633 %ashr = ashr i64 %shl, 63 634 635 %and = and i64 %ashr, %s.val 636 store i64 %and, i64 addrspace(1)* %out.gep, align 8 637 ret void 638 } 639 640 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use: 641 ; SI: buffer_load_dwordx2 642 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 643 ; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] 644 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] 645 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]] 646 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 647 define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { 648 %tid = call i32 @llvm.r600.read.tidig.x() 649 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 650 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid 651 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 652 %a = load i64, i64 addrspace(1)* %a.gep, align 8 653 %b = load i64, i64 addrspace(1)* %b.gep, align 8 654 655 %c = shl i64 %a, %b 656 %shl = shl i64 %c, 32 657 %ashr = ashr i64 %shl, 32 658 %and = and i64 %ashr, %s.val 659 store i64 %and, i64 addrspace(1)* %out.gep, align 8 660 ret void 661 } 662