1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s 3 ; RUN: llc -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM %s 4 5 ; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll, 6 ; but with all 64-bit tests, and tests with loads dropped. 7 8 ; Patterns: 9 ; a) x & (1 << nbits) - 1 10 ; b) x & ~(-1 << nbits) 11 ; c) x & (-1 >> (32 - y)) 12 ; d) x << (32 - y) >> (32 - y) 13 ; are equivalent. 14 15 ; ---------------------------------------------------------------------------- ; 16 ; Pattern a. 32-bit 17 ; ---------------------------------------------------------------------------- ; 18 19 define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 20 ; EG-LABEL: bzhi32_a0: 21 ; EG: ; %bb.0: 22 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 23 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 24 ; EG-NEXT: CF_END 25 ; EG-NEXT: PAD 26 ; EG-NEXT: ALU clause starting at 4: 27 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 28 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 29 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 30 ; 31 ; CM-LABEL: bzhi32_a0: 32 ; CM: ; %bb.0: 33 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 34 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 35 ; CM-NEXT: CF_END 36 ; CM-NEXT: PAD 37 ; CM-NEXT: ALU clause starting at 4: 38 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 39 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 40 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 41 %onebit = shl i32 1, %numlowbits 42 %mask = add nsw i32 %onebit, -1 43 %masked = and i32 %mask, %val 44 store i32 %masked, i32 addrspace(1)* %out 45 ret void 46 } 47 48 define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) { 49 ; EG-LABEL: bzhi32_a1_indexzext: 50 ; EG: ; %bb.0: 51 ; EG-NEXT: ALU 0, @8, KC0[], KC1[] 52 ; EG-NEXT: TEX 0 @6 53 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 54 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 55 ; EG-NEXT: CF_END 56 ; EG-NEXT: PAD 57 ; EG-NEXT: Fetch clause starting at 6: 58 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 59 ; EG-NEXT: ALU clause starting at 8: 60 ; EG-NEXT: MOV * T0.X, 0.0, 61 ; EG-NEXT: ALU clause starting at 9: 62 ; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 63 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 64 ; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W, 65 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 66 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 67 ; 68 ; CM-LABEL: bzhi32_a1_indexzext: 69 ; CM: ; %bb.0: 70 ; CM-NEXT: ALU 0, @8, KC0[], KC1[] 71 ; CM-NEXT: TEX 0 @6 72 ; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 73 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 74 ; CM-NEXT: CF_END 75 ; CM-NEXT: PAD 76 ; CM-NEXT: Fetch clause starting at 6: 77 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 78 ; CM-NEXT: ALU clause starting at 8: 79 ; CM-NEXT: MOV * T0.X, 0.0, 80 ; CM-NEXT: ALU clause starting at 9: 81 ; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 82 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 83 ; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W, 84 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 85 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 86 %conv = zext i8 %numlowbits to i32 87 %onebit = shl i32 1, %conv 88 %mask = add nsw i32 %onebit, -1 89 %masked = and i32 %mask, %val 90 store i32 %masked, i32 addrspace(1)* %out 91 ret void 92 } 93 94 define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 95 ; EG-LABEL: bzhi32_a4_commutative: 96 ; EG: ; %bb.0: 97 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 98 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 99 ; EG-NEXT: CF_END 100 ; EG-NEXT: PAD 101 ; EG-NEXT: ALU clause starting at 4: 102 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 103 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 104 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 105 ; 106 ; CM-LABEL: bzhi32_a4_commutative: 107 ; CM: ; %bb.0: 108 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 109 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 110 ; CM-NEXT: CF_END 111 ; CM-NEXT: PAD 112 ; CM-NEXT: ALU clause starting at 4: 113 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 114 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 115 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 116 %onebit = shl i32 1, %numlowbits 117 %mask = add nsw i32 %onebit, -1 118 %masked = and i32 %val, %mask ; swapped order 119 store i32 %masked, i32 addrspace(1)* %out 120 ret void 121 } 122 123 ; ---------------------------------------------------------------------------- ; 124 ; Pattern b. 32-bit 125 ; ---------------------------------------------------------------------------- ; 126 127 define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 128 ; EG-LABEL: bzhi32_b0: 129 ; EG: ; %bb.0: 130 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 131 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 132 ; EG-NEXT: CF_END 133 ; EG-NEXT: PAD 134 ; EG-NEXT: ALU clause starting at 4: 135 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 136 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 137 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 138 ; 139 ; CM-LABEL: bzhi32_b0: 140 ; CM: ; %bb.0: 141 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 142 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 143 ; CM-NEXT: CF_END 144 ; CM-NEXT: PAD 145 ; CM-NEXT: ALU clause starting at 4: 146 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 147 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 148 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 149 %notmask = shl i32 -1, %numlowbits 150 %mask = xor i32 %notmask, -1 151 %masked = and i32 %mask, %val 152 store i32 %masked, i32 addrspace(1)* %out 153 ret void 154 } 155 156 define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) { 157 ; EG-LABEL: bzhi32_b1_indexzext: 158 ; EG: ; %bb.0: 159 ; EG-NEXT: ALU 0, @8, KC0[], KC1[] 160 ; EG-NEXT: TEX 0 @6 161 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 162 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 163 ; EG-NEXT: CF_END 164 ; EG-NEXT: PAD 165 ; EG-NEXT: Fetch clause starting at 6: 166 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 167 ; EG-NEXT: ALU clause starting at 8: 168 ; EG-NEXT: MOV * T0.X, 0.0, 169 ; EG-NEXT: ALU clause starting at 9: 170 ; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 171 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 172 ; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W, 173 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 174 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 175 ; 176 ; CM-LABEL: bzhi32_b1_indexzext: 177 ; CM: ; %bb.0: 178 ; CM-NEXT: ALU 0, @8, KC0[], KC1[] 179 ; CM-NEXT: TEX 0 @6 180 ; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 181 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 182 ; CM-NEXT: CF_END 183 ; CM-NEXT: PAD 184 ; CM-NEXT: Fetch clause starting at 6: 185 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 186 ; CM-NEXT: ALU clause starting at 8: 187 ; CM-NEXT: MOV * T0.X, 0.0, 188 ; CM-NEXT: ALU clause starting at 9: 189 ; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, 190 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 191 ; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W, 192 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 193 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 194 %conv = zext i8 %numlowbits to i32 195 %notmask = shl i32 -1, %conv 196 %mask = xor i32 %notmask, -1 197 %masked = and i32 %mask, %val 198 store i32 %masked, i32 addrspace(1)* %out 199 ret void 200 } 201 202 define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 203 ; EG-LABEL: bzhi32_b4_commutative: 204 ; EG: ; %bb.0: 205 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 206 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 207 ; EG-NEXT: CF_END 208 ; EG-NEXT: PAD 209 ; EG-NEXT: ALU clause starting at 4: 210 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 211 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 212 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 213 ; 214 ; CM-LABEL: bzhi32_b4_commutative: 215 ; CM: ; %bb.0: 216 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 217 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 218 ; CM-NEXT: CF_END 219 ; CM-NEXT: PAD 220 ; CM-NEXT: ALU clause starting at 4: 221 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 222 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 223 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 224 %notmask = shl i32 -1, %numlowbits 225 %mask = xor i32 %notmask, -1 226 %masked = and i32 %val, %mask ; swapped order 227 store i32 %masked, i32 addrspace(1)* %out 228 ret void 229 } 230 231 ; ---------------------------------------------------------------------------- ; 232 ; Pattern c. 32-bit 233 ; ---------------------------------------------------------------------------- ; 234 235 define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 236 ; EG-LABEL: bzhi32_c0: 237 ; EG: ; %bb.0: 238 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 239 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 240 ; EG-NEXT: CF_END 241 ; EG-NEXT: PAD 242 ; EG-NEXT: ALU clause starting at 4: 243 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 244 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 245 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 246 ; 247 ; CM-LABEL: bzhi32_c0: 248 ; CM: ; %bb.0: 249 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 250 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 251 ; CM-NEXT: CF_END 252 ; CM-NEXT: PAD 253 ; CM-NEXT: ALU clause starting at 4: 254 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 255 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 256 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 257 %numhighbits = sub i32 32, %numlowbits 258 %mask = lshr i32 -1, %numhighbits 259 %masked = and i32 %mask, %val 260 store i32 %masked, i32 addrspace(1)* %out 261 ret void 262 } 263 264 define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) { 265 ; EG-LABEL: bzhi32_c1_indexzext: 266 ; EG: ; %bb.0: 267 ; EG-NEXT: ALU 0, @8, KC0[], KC1[] 268 ; EG-NEXT: TEX 0 @6 269 ; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 270 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 271 ; EG-NEXT: CF_END 272 ; EG-NEXT: PAD 273 ; EG-NEXT: Fetch clause starting at 6: 274 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 275 ; EG-NEXT: ALU clause starting at 8: 276 ; EG-NEXT: MOV * T0.X, 0.0, 277 ; EG-NEXT: ALU clause starting at 9: 278 ; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X, 279 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 280 ; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 281 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 282 ; EG-NEXT: LSHR * T0.W, literal.x, PV.W, 283 ; EG-NEXT: -1(nan), 0(0.000000e+00) 284 ; EG-NEXT: AND_INT T0.X, PV.W, KC0[2].Y, 285 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 286 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 287 ; 288 ; CM-LABEL: bzhi32_c1_indexzext: 289 ; CM: ; %bb.0: 290 ; CM-NEXT: ALU 0, @8, KC0[], KC1[] 291 ; CM-NEXT: TEX 0 @6 292 ; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 293 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 294 ; CM-NEXT: CF_END 295 ; CM-NEXT: PAD 296 ; CM-NEXT: Fetch clause starting at 6: 297 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 298 ; CM-NEXT: ALU clause starting at 8: 299 ; CM-NEXT: MOV * T0.X, 0.0, 300 ; CM-NEXT: ALU clause starting at 9: 301 ; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X, 302 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 303 ; CM-NEXT: AND_INT * T0.W, PV.W, literal.x, 304 ; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00) 305 ; CM-NEXT: LSHR * T0.W, literal.x, PV.W, 306 ; CM-NEXT: -1(nan), 0(0.000000e+00) 307 ; CM-NEXT: AND_INT * T0.X, PV.W, KC0[2].Y, 308 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 309 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 310 %numhighbits = sub i8 32, %numlowbits 311 %sh_prom = zext i8 %numhighbits to i32 312 %mask = lshr i32 -1, %sh_prom 313 %masked = and i32 %mask, %val 314 store i32 %masked, i32 addrspace(1)* %out 315 ret void 316 } 317 318 define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 319 ; EG-LABEL: bzhi32_c4_commutative: 320 ; EG: ; %bb.0: 321 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 322 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 323 ; EG-NEXT: CF_END 324 ; EG-NEXT: PAD 325 ; EG-NEXT: ALU clause starting at 4: 326 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 327 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 328 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 329 ; 330 ; CM-LABEL: bzhi32_c4_commutative: 331 ; CM: ; %bb.0: 332 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 333 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 334 ; CM-NEXT: CF_END 335 ; CM-NEXT: PAD 336 ; CM-NEXT: ALU clause starting at 4: 337 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 338 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 339 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 340 %numhighbits = sub i32 32, %numlowbits 341 %mask = lshr i32 -1, %numhighbits 342 %masked = and i32 %val, %mask ; swapped order 343 store i32 %masked, i32 addrspace(1)* %out 344 ret void 345 } 346 347 ; ---------------------------------------------------------------------------- ; 348 ; Pattern d. 32-bit. 349 ; ---------------------------------------------------------------------------- ; 350 351 define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { 352 ; EG-LABEL: bzhi32_d0: 353 ; EG: ; %bb.0: 354 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 355 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 356 ; EG-NEXT: CF_END 357 ; EG-NEXT: PAD 358 ; EG-NEXT: ALU clause starting at 4: 359 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 360 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 361 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 362 ; 363 ; CM-LABEL: bzhi32_d0: 364 ; CM: ; %bb.0: 365 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 366 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X 367 ; CM-NEXT: CF_END 368 ; CM-NEXT: PAD 369 ; CM-NEXT: ALU clause starting at 4: 370 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x, 371 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 372 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z, 373 %numhighbits = sub i32 32, %numlowbits 374 %highbitscleared = shl i32 %val, %numhighbits 375 %masked = lshr i32 %highbitscleared, %numhighbits 376 store i32 %masked, i32 addrspace(1)* %out 377 ret void 378 } 379 380 define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) { 381 ; EG-LABEL: bzhi32_d1_indexzext: 382 ; EG: ; %bb.0: 383 ; EG-NEXT: ALU 0, @8, KC0[], KC1[] 384 ; EG-NEXT: TEX 0 @6 385 ; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 386 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 387 ; EG-NEXT: CF_END 388 ; EG-NEXT: PAD 389 ; EG-NEXT: Fetch clause starting at 6: 390 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 391 ; EG-NEXT: ALU clause starting at 8: 392 ; EG-NEXT: MOV * T0.X, 0.0, 393 ; EG-NEXT: ALU clause starting at 9: 394 ; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X, 395 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 396 ; EG-NEXT: AND_INT * T0.W, PV.W, literal.x, 397 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 398 ; EG-NEXT: LSHL * T1.W, KC0[2].Y, PV.W, 399 ; EG-NEXT: LSHR T0.X, PV.W, T0.W, 400 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 401 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 402 ; 403 ; CM-LABEL: bzhi32_d1_indexzext: 404 ; CM: ; %bb.0: 405 ; CM-NEXT: ALU 0, @8, KC0[], KC1[] 406 ; CM-NEXT: TEX 0 @6 407 ; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 408 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 409 ; CM-NEXT: CF_END 410 ; CM-NEXT: PAD 411 ; CM-NEXT: Fetch clause starting at 6: 412 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3 413 ; CM-NEXT: ALU clause starting at 8: 414 ; CM-NEXT: MOV * T0.X, 0.0, 415 ; CM-NEXT: ALU clause starting at 9: 416 ; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X, 417 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 418 ; CM-NEXT: AND_INT * T0.W, PV.W, literal.x, 419 ; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00) 420 ; CM-NEXT: LSHL * T1.W, KC0[2].Y, PV.W, 421 ; CM-NEXT: LSHR * T0.X, PV.W, T0.W, 422 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x, 423 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 424 %numhighbits = sub i8 32, %numlowbits 425 %sh_prom = zext i8 %numhighbits to i32 426 %highbitscleared = shl i32 %val, %sh_prom 427 %masked = lshr i32 %highbitscleared, %sh_prom 428 store i32 %masked, i32 addrspace(1)* %out 429 ret void 430 } 431