1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 4 5 ; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll, 6 ; but with all 64-bit tests, and tests with loads dropped. 7 8 ; Patterns: 9 ; a) x & (1 << nbits) - 1 10 ; b) x & ~(-1 << nbits) 11 ; c) x & (-1 >> (32 - y)) 12 ; d) x << (32 - y) >> (32 - y) 13 ; are equivalent. 14 15 ; ---------------------------------------------------------------------------- ; 16 ; Pattern a. 32-bit 17 ; ---------------------------------------------------------------------------- ; 18 19 define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { 20 ; GCN-LABEL: bzhi32_a0: 21 ; GCN: ; %bb.0: 22 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 24 ; GCN-NEXT: s_setpc_b64 s[30:31] 25 %onebit = shl i32 1, %numlowbits 26 %mask = add nsw i32 %onebit, -1 27 %masked = and i32 %mask, %val 28 ret i32 %masked 29 } 30 31 define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 32 ; GCN-LABEL: bzhi32_a1_indexzext: 33 ; GCN: ; %bb.0: 34 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 36 ; GCN-NEXT: s_setpc_b64 s[30:31] 37 %conv = zext i8 %numlowbits to i32 38 %onebit = shl i32 1, %conv 39 %mask = add nsw i32 %onebit, -1 40 %masked = and i32 %mask, %val 41 ret i32 %masked 42 } 43 44 define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { 45 ; GCN-LABEL: bzhi32_a4_commutative: 46 ; GCN: ; %bb.0: 47 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 49 ; GCN-NEXT: s_setpc_b64 s[30:31] 50 %onebit = shl i32 1, %numlowbits 51 %mask = add nsw i32 %onebit, -1 52 %masked = and i32 %val, %mask ; swapped order 53 ret i32 %masked 54 } 55 56 ; ---------------------------------------------------------------------------- ; 57 ; Pattern b. 32-bit 58 ; ---------------------------------------------------------------------------- ; 59 60 define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { 61 ; GCN-LABEL: bzhi32_b0: 62 ; GCN: ; %bb.0: 63 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 65 ; GCN-NEXT: s_setpc_b64 s[30:31] 66 %notmask = shl i32 -1, %numlowbits 67 %mask = xor i32 %notmask, -1 68 %masked = and i32 %mask, %val 69 ret i32 %masked 70 } 71 72 define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 73 ; GCN-LABEL: bzhi32_b1_indexzext: 74 ; GCN: ; %bb.0: 75 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 77 ; GCN-NEXT: s_setpc_b64 s[30:31] 78 %conv = zext i8 %numlowbits to i32 79 %notmask = shl i32 -1, %conv 80 %mask = xor i32 %notmask, -1 81 %masked = and i32 %mask, %val 82 ret i32 %masked 83 } 84 85 define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { 86 ; GCN-LABEL: bzhi32_b4_commutative: 87 ; GCN: ; %bb.0: 88 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 90 ; GCN-NEXT: s_setpc_b64 s[30:31] 91 %notmask = shl i32 -1, %numlowbits 92 %mask = xor i32 %notmask, -1 93 %masked = and i32 %val, %mask ; swapped order 94 ret i32 %masked 95 } 96 97 ; ---------------------------------------------------------------------------- ; 98 ; Pattern c. 32-bit 99 ; ---------------------------------------------------------------------------- ; 100 101 define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { 102 ; GCN-LABEL: bzhi32_c0: 103 ; GCN: ; %bb.0: 104 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 106 ; GCN-NEXT: s_setpc_b64 s[30:31] 107 %numhighbits = sub i32 32, %numlowbits 108 %mask = lshr i32 -1, %numhighbits 109 %masked = and i32 %mask, %val 110 ret i32 %masked 111 } 112 113 define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { 114 ; SI-LABEL: bzhi32_c1_indexzext: 115 ; SI: ; %bb.0: 116 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117 ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 118 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 119 ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 120 ; SI-NEXT: v_and_b32_e32 v0, v1, v0 121 ; SI-NEXT: s_setpc_b64 s[30:31] 122 ; 123 ; VI-LABEL: bzhi32_c1_indexzext: 124 ; VI: ; %bb.0: 125 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126 ; VI-NEXT: v_sub_u16_e32 v1, 32, v1 127 ; VI-NEXT: v_mov_b32_e32 v2, -1 128 ; VI-NEXT: v_lshrrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 129 ; VI-NEXT: v_and_b32_e32 v0, v1, v0 130 ; VI-NEXT: s_setpc_b64 s[30:31] 131 %numhighbits = sub i8 32, %numlowbits 132 %sh_prom = zext i8 %numhighbits to i32 133 %mask = lshr i32 -1, %sh_prom 134 %masked = and i32 %mask, %val 135 ret i32 %masked 136 } 137 138 define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { 139 ; GCN-LABEL: bzhi32_c4_commutative: 140 ; GCN: ; %bb.0: 141 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 143 ; GCN-NEXT: s_setpc_b64 s[30:31] 144 %numhighbits = sub i32 32, %numlowbits 145 %mask = lshr i32 -1, %numhighbits 146 %masked = and i32 %val, %mask ; swapped order 147 ret i32 %masked 148 } 149 150 ; ---------------------------------------------------------------------------- ; 151 ; Pattern d. 32-bit. 152 ; ---------------------------------------------------------------------------- ; 153 154 define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { 155 ; GCN-LABEL: bzhi32_d0: 156 ; GCN: ; %bb.0: 157 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 159 ; GCN-NEXT: s_setpc_b64 s[30:31] 160 %numhighbits = sub i32 32, %numlowbits 161 %highbitscleared = shl i32 %val, %numhighbits 162 %masked = lshr i32 %highbitscleared, %numhighbits 163 ret i32 %masked 164 } 165 166 define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { 167 ; SI-LABEL: bzhi32_d1_indexzext: 168 ; SI: ; %bb.0: 169 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170 ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 171 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 172 ; SI-NEXT: v_lshl_b32_e32 v0, v0, v1 173 ; SI-NEXT: v_lshr_b32_e32 v0, v0, v1 174 ; SI-NEXT: s_setpc_b64 s[30:31] 175 ; 176 ; VI-LABEL: bzhi32_d1_indexzext: 177 ; VI: ; %bb.0: 178 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179 ; VI-NEXT: v_sub_u16_e32 v1, 32, v1 180 ; VI-NEXT: v_and_b32_e32 v1, 0xff, v1 181 ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 182 ; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 183 ; VI-NEXT: s_setpc_b64 s[30:31] 184 %numhighbits = sub i8 32, %numlowbits 185 %sh_prom = zext i8 %numhighbits to i32 186 %highbitscleared = shl i32 %val, %sh_prom 187 %masked = lshr i32 %highbitscleared, %sh_prom 188 ret i32 %masked 189 } 190