1 ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s 2 3 ; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X)) 4 5 ; Canolicalize the sequence shl/zext/lshr performing the zeroextend 6 ; as the last instruction of the sequence. 7 ; This will help DAGCombiner to identify and then fold the sequence 8 ; of shifts into a single AND. 9 ; This transformation is profitable if the shift amounts are the same 10 ; and if there is only one use of the zext. 11 12 define i16 @fun1(i8 zeroext %v) { 13 entry: 14 %shr = lshr i8 %v, 4 15 %ext = zext i8 %shr to i16 16 %shl = shl i16 %ext, 4 17 ret i16 %shl 18 } 19 20 ; CHECK-LABEL: @fun1 21 ; CHECK: and 22 ; CHECK-NOT: shr 23 ; CHECK-NOT: shl 24 ; CHECK: ret 25 26 define i32 @fun2(i8 zeroext %v) { 27 entry: 28 %shr = lshr i8 %v, 4 29 %ext = zext i8 %shr to i32 30 %shl = shl i32 %ext, 4 31 ret i32 %shl 32 } 33 34 ; CHECK-LABEL: @fun2 35 ; CHECK: and 36 ; CHECK-NOT: shr 37 ; CHECK-NOT: shl 38 ; CHECK: ret 39 40 define i32 @fun3(i16 zeroext %v) { 41 entry: 42 %shr = lshr i16 %v, 4 43 %ext = zext i16 %shr to i32 44 %shl = shl i32 %ext, 4 45 ret i32 %shl 46 } 47 48 ; CHECK-LABEL: @fun3 49 ; CHECK: and 50 ; CHECK-NOT: shr 51 ; CHECK-NOT: shl 52 ; CHECK: ret 53 54 define i64 @fun4(i8 zeroext %v) { 55 entry: 56 %shr = lshr i8 %v, 4 57 %ext = zext i8 %shr to i64 58 %shl = shl i64 %ext, 4 59 ret i64 %shl 60 } 61 62 ; CHECK-LABEL: @fun4 63 ; CHECK: and 64 ; CHECK-NOT: shr 65 ; CHECK-NOT: shl 66 ; CHECK: ret 67 68 define i64 @fun5(i16 zeroext %v) { 69 entry: 70 %shr = lshr i16 %v, 4 71 %ext = zext i16 %shr to i64 72 %shl = shl i64 %ext, 4 73 ret i64 %shl 74 } 75 76 ; CHECK-LABEL: @fun5 77 ; CHECK: and 78 ; CHECK-NOT: shr 79 ; CHECK-NOT: shl 80 ; CHECK: ret 81 82 define i64 @fun6(i32 zeroext %v) { 83 entry: 84 %shr = lshr i32 %v, 4 85 %ext = zext i32 %shr to i64 86 %shl = shl i64 %ext, 4 87 ret i64 %shl 88 } 89 90 ; CHECK-LABEL: @fun6 91 ; CHECK: and 92 ; CHECK-NOT: shr 93 ; CHECK-NOT: shl 94 ; CHECK: ret 95 96 ; Don't fold the pattern if we use arithmetic shifts. 97 98 define i64 @fun7(i8 zeroext %v) { 99 entry: 100 %shr = ashr i8 %v, 4 101 %ext = zext i8 %shr to i64 102 %shl = shl i64 %ext, 4 103 ret i64 %shl 104 } 105 106 ; CHECK-LABEL: @fun7 107 ; CHECK: sar 108 ; CHECK: shl 109 ; CHECK: ret 110 111 define i64 @fun8(i16 zeroext %v) { 112 entry: 113 %shr = ashr i16 %v, 4 114 %ext = zext i16 %shr to i64 115 %shl = shl i64 %ext, 4 116 ret i64 %shl 117 } 118 119 ; CHECK-LABEL: @fun8 120 ; CHECK: sar 121 ; CHECK: shl 122 ; CHECK: ret 123 124 define i64 @fun9(i32 zeroext %v) { 125 entry: 126 %shr = ashr i32 %v, 4 127 %ext = zext i32 %shr to i64 128 %shl = shl i64 %ext, 4 129 ret i64 %shl 130 } 131 132 ; CHECK-LABEL: @fun9 133 ; CHECK: sar 134 ; CHECK: shl 135 ; CHECK: ret 136 137 ; Don't fold the pattern if there is more than one use of the 138 ; operand in input to the shift left. 139 140 define i64 @fun10(i8 zeroext %v) { 141 entry: 142 %shr = lshr i8 %v, 4 143 %ext = zext i8 %shr to i64 144 %shl = shl i64 %ext, 4 145 %add = add i64 %shl, %ext 146 ret i64 %add 147 } 148 149 ; CHECK-LABEL: @fun10 150 ; CHECK: shr 151 ; CHECK: shl 152 ; CHECK: ret 153 154 define i64 @fun11(i16 zeroext %v) { 155 entry: 156 %shr = lshr i16 %v, 4 157 %ext = zext i16 %shr to i64 158 %shl = shl i64 %ext, 4 159 %add = add i64 %shl, %ext 160 ret i64 %add 161 } 162 163 ; CHECK-LABEL: @fun11 164 ; CHECK: shr 165 ; CHECK: shl 166 ; CHECK: ret 167 168 define i64 @fun12(i32 zeroext %v) { 169 entry: 170 %shr = lshr i32 %v, 4 171 %ext = zext i32 %shr to i64 172 %shl = shl i64 %ext, 4 173 %add = add i64 %shl, %ext 174 ret i64 %add 175 } 176 177 ; CHECK-LABEL: @fun12 178 ; CHECK: shr 179 ; CHECK: shl 180 ; CHECK: ret 181 182 ; PR17380 183 ; Make sure that the combined dags are legal if we run the DAGCombiner after 184 ; Legalization took place. The add instruction is redundant and increases by 185 ; one the number of uses of the zext. This prevents the transformation from 186 ; firing before dags are legalized and optimized. 187 ; Once the add is removed, the number of uses becomes one and therefore the 188 ; dags are canonicalized. After Legalization, we need to make sure that the 189 ; valuetype for the shift count is legal. 190 ; Verify also that we correctly fold the shl-shr sequence into an 191 ; AND with bitmask. 192 193 define void @g(i32 %a) { 194 %b = lshr i32 %a, 2 195 %c = zext i32 %b to i64 196 %d = add i64 %c, 1 197 %e = shl i64 %c, 2 198 tail call void @f(i64 %e) 199 ret void 200 } 201 202 ; CHECK-LABEL: @g 203 ; CHECK-NOT: shr 204 ; CHECK-NOT: shl 205 ; CHECK: and 206 ; CHECK-NEXT: jmp 207 208 declare void @f(i64) 209 210