1 ; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s 2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s 3 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 4 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 5 6 ; This particular case will actually be worse in terms of code size 7 ; from sinking into both. 8 9 ; OPT-LABEL: @sink_ubfe_i32( 10 ; OPT: entry: 11 ; OPT-NEXT: br i1 12 13 ; OPT: bb0: 14 ; OPT: %0 = lshr i32 %arg1, 8 15 ; OPT-NEXT: %val0 = and i32 %0, 255 16 ; OPT: br label 17 18 ; OPT: bb1: 19 ; OPT: %1 = lshr i32 %arg1, 8 20 ; OPT-NEXT: %val1 = and i32 %1, 127 21 ; OPT: br label 22 23 ; OPT: ret: 24 ; OPT: store 25 ; OPT: ret 26 27 28 ; GCN-LABEL: {{^}}sink_ubfe_i32: 29 ; GCN-NOT: lshr 30 ; GCN: s_cbranch_scc1 31 32 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008 33 ; GCN: BB0_2: 34 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008 35 36 ; GCN: BB0_3: 37 ; GCN: buffer_store_dword 38 ; GCN: s_endpgm 39 define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 { 40 entry: 41 %shr = lshr i32 %arg1, 8 42 br i1 undef, label %bb0, label %bb1 43 44 bb0: 45 %val0 = and i32 %shr, 255 46 store volatile i32 0, i32 addrspace(1)* undef 47 br label %ret 48 49 bb1: 50 %val1 = and i32 %shr, 127 51 store volatile i32 0, i32 addrspace(1)* undef 52 br label %ret 53 54 ret: 55 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ] 56 store i32 %phi, i32 addrspace(1)* %out 57 ret void 58 } 59 60 ; OPT-LABEL: @sink_sbfe_i32( 61 ; OPT: entry: 62 ; OPT-NEXT: br i1 63 64 ; OPT: bb0: 65 ; OPT: %0 = ashr i32 %arg1, 8 66 ; OPT-NEXT: %val0 = and i32 %0, 255 67 ; OPT: br label 68 69 ; OPT: bb1: 70 ; OPT: %1 = ashr i32 %arg1, 8 71 ; OPT-NEXT: %val1 = and i32 %1, 127 72 ; OPT: br label 73 74 ; OPT: ret: 75 ; OPT: store 76 ; OPT: ret 77 78 ; GCN-LABEL: {{^}}sink_sbfe_i32: 79 define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 { 80 entry: 81 %shr = ashr i32 %arg1, 8 82 br i1 undef, label %bb0, label %bb1 83 84 bb0: 85 %val0 = and i32 %shr, 255 86 store volatile i32 0, i32 addrspace(1)* undef 87 br label %ret 88 89 bb1: 90 %val1 = and i32 %shr, 127 91 store volatile i32 0, i32 addrspace(1)* undef 92 br label %ret 93 94 ret: 95 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ] 96 store i32 %phi, i32 addrspace(1)* %out 97 ret void 98 } 99 100 101 ; OPT-LABEL: @sink_ubfe_i16( 102 ; OPT: entry: 103 ; OPT-NEXT: br i1 104 105 ; OPT: bb0: 106 ; OPT: %0 = lshr i16 %arg1, 4 107 ; OPT-NEXT: %val0 = and i16 %0, 255 108 ; OPT: br label 109 110 ; OPT: bb1: 111 ; OPT: %1 = lshr i16 %arg1, 4 112 ; OPT-NEXT: %val1 = and i16 %1, 127 113 ; OPT: br label 114 115 ; OPT: ret: 116 ; OPT: store 117 ; OPT: ret 118 119 ; For GFX8: since i16 is legal type, we cannot sink lshr into BBs. 120 121 ; GCN-LABEL: {{^}}sink_ubfe_i16: 122 ; GCN-NOT: lshr 123 ; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c 124 ; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004 125 ; GCN: s_cbranch_scc1 126 127 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004 128 ; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff 129 130 ; GCN: BB2_2: 131 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004 132 ; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f 133 134 ; GCN: BB2_3: 135 ; GCN: buffer_store_short 136 ; GCN: s_endpgm 137 define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 { 138 entry: 139 %shr = lshr i16 %arg1, 4 140 br i1 undef, label %bb0, label %bb1 141 142 bb0: 143 %val0 = and i16 %shr, 255 144 store volatile i16 0, i16 addrspace(1)* undef 145 br label %ret 146 147 bb1: 148 %val1 = and i16 %shr, 127 149 store volatile i16 0, i16 addrspace(1)* undef 150 br label %ret 151 152 ret: 153 %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ] 154 store i16 %phi, i16 addrspace(1)* %out 155 ret void 156 } 157 158 ; We don't really want to sink this one since it isn't reducible to a 159 ; 32-bit BFE on one half of the integer. 160 161 ; OPT-LABEL: @sink_ubfe_i64_span_midpoint( 162 ; OPT: entry: 163 ; OPT-NOT: lshr 164 ; OPT: br i1 165 166 ; OPT: bb0: 167 ; OPT: %0 = lshr i64 %arg1, 30 168 ; OPT-NEXT: %val0 = and i64 %0, 255 169 170 ; OPT: bb1: 171 ; OPT: %1 = lshr i64 %arg1, 30 172 ; OPT-NEXT: %val1 = and i64 %1, 127 173 174 ; OPT: ret: 175 ; OPT: store 176 ; OPT: ret 177 178 ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: 179 180 ; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30 181 ; GCN: s_cbranch_scc1 BB3_2 182 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]] 183 184 ; GCN: BB3_2: 185 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]] 186 187 ; GCN: BB3_3: 188 ; GCN: buffer_store_dwordx2 189 define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 { 190 entry: 191 %shr = lshr i64 %arg1, 30 192 br i1 undef, label %bb0, label %bb1 193 194 bb0: 195 %val0 = and i64 %shr, 255 196 store volatile i32 0, i32 addrspace(1)* undef 197 br label %ret 198 199 bb1: 200 %val1 = and i64 %shr, 127 201 store volatile i32 0, i32 addrspace(1)* undef 202 br label %ret 203 204 ret: 205 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] 206 store i64 %phi, i64 addrspace(1)* %out 207 ret void 208 } 209 210 ; OPT-LABEL: @sink_ubfe_i64_low32( 211 ; OPT: entry: 212 ; OPT-NOT: lshr 213 ; OPT: br i1 214 215 ; OPT: bb0: 216 ; OPT: %0 = lshr i64 %arg1, 15 217 ; OPT-NEXT: %val0 = and i64 %0, 255 218 219 ; OPT: bb1: 220 ; OPT: %1 = lshr i64 %arg1, 15 221 ; OPT-NEXT: %val1 = and i64 %1, 127 222 223 ; OPT: ret: 224 ; OPT: store 225 ; OPT: ret 226 227 ; GCN-LABEL: {{^}}sink_ubfe_i64_low32: 228 229 ; GCN: s_cbranch_scc1 BB4_2 230 231 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f 232 233 ; GCN: BB4_2: 234 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f 235 236 ; GCN: BB4_3: 237 ; GCN: buffer_store_dwordx2 238 define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 { 239 entry: 240 %shr = lshr i64 %arg1, 15 241 br i1 undef, label %bb0, label %bb1 242 243 bb0: 244 %val0 = and i64 %shr, 255 245 store volatile i32 0, i32 addrspace(1)* undef 246 br label %ret 247 248 bb1: 249 %val1 = and i64 %shr, 127 250 store volatile i32 0, i32 addrspace(1)* undef 251 br label %ret 252 253 ret: 254 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] 255 store i64 %phi, i64 addrspace(1)* %out 256 ret void 257 } 258 259 ; OPT-LABEL: @sink_ubfe_i64_high32( 260 ; OPT: entry: 261 ; OPT-NOT: lshr 262 ; OPT: br i1 263 264 ; OPT: bb0: 265 ; OPT: %0 = lshr i64 %arg1, 35 266 ; OPT-NEXT: %val0 = and i64 %0, 255 267 268 ; OPT: bb1: 269 ; OPT: %1 = lshr i64 %arg1, 35 270 ; OPT-NEXT: %val1 = and i64 %1, 127 271 272 ; OPT: ret: 273 ; OPT: store 274 ; OPT: ret 275 276 ; GCN-LABEL: {{^}}sink_ubfe_i64_high32: 277 ; GCN: s_cbranch_scc1 BB5_2 278 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003 279 280 ; GCN: BB5_2: 281 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003 282 283 ; GCN: BB5_3: 284 ; GCN: buffer_store_dwordx2 285 define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 { 286 entry: 287 %shr = lshr i64 %arg1, 35 288 br i1 undef, label %bb0, label %bb1 289 290 bb0: 291 %val0 = and i64 %shr, 255 292 store volatile i32 0, i32 addrspace(1)* undef 293 br label %ret 294 295 bb1: 296 %val1 = and i64 %shr, 127 297 store volatile i32 0, i32 addrspace(1)* undef 298 br label %ret 299 300 ret: 301 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] 302 store i64 %phi, i64 addrspace(1)* %out 303 ret void 304 } 305 306 attributes #0 = { nounwind } 307