1 ; RUN: opt -S -codegenprepare %s -o - | FileCheck %s 2 ; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s 3 ; This file tests the different cases what are involved when codegen prepare 4 ; tries to get sign extension out of the way of addressing mode. 5 ; This tests require an actual target as addressing mode decisions depends 6 ; on the target. 7 8 target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" 9 target triple = "x86_64-apple-macosx" 10 11 12 ; Check that we correctly promote both operands of the promotable add. 13 ; CHECK-LABEL: @twoArgsPromotion 14 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg1 to i64 15 ; CHECK: [[ARG2SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg2 to i64 16 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], [[ARG2SEXT]] 17 ; CHECK: inttoptr i64 [[PROMOTED]] to i8* 18 ; CHECK: ret 19 define i8 @twoArgsPromotion(i32 %arg1, i32 %arg2) { 20 %add = add nsw i32 %arg1, %arg2 21 %sextadd = sext i32 %add to i64 22 %base = inttoptr i64 %sextadd to i8* 23 %res = load i8* %base 24 ret i8 %res 25 } 26 27 ; Check that we do not promote both operands of the promotable add when 28 ; the instruction will not be folded into the addressing mode. 29 ; Otherwise, we will increase the number of instruction executed. 30 ; (This is a heuristic of course, because the new sext could have been 31 ; merged with something else.) 32 ; CHECK-LABEL: @twoArgsNoPromotion 33 ; CHECK: add nsw i32 %arg1, %arg2 34 ; CHECK: ret 35 define i8 @twoArgsNoPromotion(i32 %arg1, i32 %arg2, i8* %base) { 36 %add = add nsw i32 %arg1, %arg2 37 %sextadd = sext i32 %add to i64 38 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 39 %res = load i8* %arrayidx 40 ret i8 %res 41 } 42 43 ; Check that we do not promote when the related instruction does not have 44 ; the nsw flag. 45 ; CHECK-LABEL: @noPromotion 46 ; CHECK-NOT: add i64 47 ; CHECK: ret 48 define i8 @noPromotion(i32 %arg1, i32 %arg2, i8* %base) { 49 %add = add i32 %arg1, %arg2 50 %sextadd = sext i32 %add to i64 51 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 52 %res = load i8* %arrayidx 53 ret i8 %res 54 } 55 56 ; Check that we correctly promote constant arguments. 57 ; CHECK-LABEL: @oneArgPromotion 58 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg1 to i64 59 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1 60 ; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]] 61 ; CHECK: ret 62 define i8 @oneArgPromotion(i32 %arg1, i8* %base) { 63 %add = add nsw i32 %arg1, 1 64 %sextadd = sext i32 %add to i64 65 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 66 %res = load i8* %arrayidx 67 ret i8 %res 68 } 69 70 ; Check that we do not promote truncate when we cannot determine the 71 ; bits that are dropped. 72 ; CHECK-LABEL: @oneArgPromotionBlockTrunc1 73 ; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 %arg1 to i8 74 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64 75 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1 76 ; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]] 77 ; CHECK: ret 78 define i8 @oneArgPromotionBlockTrunc1(i32 %arg1, i8* %base) { 79 %trunc = trunc i32 %arg1 to i8 80 %add = add nsw i8 %trunc, 1 81 %sextadd = sext i8 %add to i64 82 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 83 %res = load i8* %arrayidx 84 ret i8 %res 85 } 86 87 ; Check that we do not promote truncate when we cannot determine all the 88 ; bits that are dropped. 89 ; CHECK-LABEL: @oneArgPromotionBlockTrunc2 90 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i16 %arg1 to i32 91 ; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[ARG1SEXT]] to i8 92 ; CHECK: [[ARG1SEXT64:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64 93 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT64]], 1 94 ; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]] 95 ; CHECK: ret 96 define i8 @oneArgPromotionBlockTrunc2(i16 %arg1, i8* %base) { 97 %sextarg1 = sext i16 %arg1 to i32 98 %trunc = trunc i32 %sextarg1 to i8 99 %add = add nsw i8 %trunc, 1 100 %sextadd = sext i8 %add to i64 101 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 102 %res = load i8* %arrayidx 103 ret i8 %res 104 } 105 106 ; Check that we are able to promote truncate when we know all the bits 107 ; that are dropped. 108 ; CHECK-LABEL: @oneArgPromotionPassTruncKeepSExt 109 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i64 110 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1 111 ; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]] 112 ; CHECK: ret 113 define i8 @oneArgPromotionPassTruncKeepSExt(i1 %arg1, i8* %base) { 114 %sextarg1 = sext i1 %arg1 to i32 115 %trunc = trunc i32 %sextarg1 to i8 116 %add = add nsw i8 %trunc, 1 117 %sextadd = sext i8 %add to i64 118 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 119 %res = load i8* %arrayidx 120 ret i8 %res 121 } 122 123 ; On X86 truncate are free. Check that we are able to promote the add 124 ; to be used as addressing mode and that we insert a truncate for the other 125 ; use. 126 ; CHECK-LABEL: @oneArgPromotionTruncInsert 127 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64 128 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1 129 ; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8 130 ; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]] 131 ; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]] 132 ; CHECK: add i8 [[LOAD]], [[TRUNC]] 133 ; CHECK: ret 134 define i8 @oneArgPromotionTruncInsert(i8 %arg1, i8* %base) { 135 %add = add nsw i8 %arg1, 1 136 %sextadd = sext i8 %add to i64 137 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 138 %res = load i8* %arrayidx 139 %finalres = add i8 %res, %add 140 ret i8 %finalres 141 } 142 143 ; Cannot sext from a larger type than the promoted type. 144 ; CHECK-LABEL: @oneArgPromotionLargerType 145 ; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i128 %arg1 to i8 146 ; CHECK: [[ARG1SEXT64:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64 147 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT64]], 1 148 ; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]] 149 ; CHECK: ret 150 define i8 @oneArgPromotionLargerType(i128 %arg1, i8* %base) { 151 %trunc = trunc i128 %arg1 to i8 152 %add = add nsw i8 %trunc, 1 153 %sextadd = sext i8 %add to i64 154 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 155 %res = load i8* %arrayidx 156 %finalres = add i8 %res, %add 157 ret i8 %finalres 158 } 159 160 ; Use same inserted trunc 161 ; On X86 truncate are free. Check that we are able to promote the add 162 ; to be used as addressing mode and that we insert a truncate for 163 ; *all* the other uses. 164 ; CHECK-LABEL: @oneArgPromotionTruncInsertSeveralUse 165 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64 166 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1 167 ; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8 168 ; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]] 169 ; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]] 170 ; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = add i8 [[LOAD]], [[TRUNC]] 171 ; CHECK: add i8 [[ADDRES]], [[TRUNC]] 172 ; CHECK: ret 173 define i8 @oneArgPromotionTruncInsertSeveralUse(i8 %arg1, i8* %base) { 174 %add = add nsw i8 %arg1, 1 175 %sextadd = sext i8 %add to i64 176 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 177 %res = load i8* %arrayidx 178 %almostfinalres = add i8 %res, %add 179 %finalres = add i8 %almostfinalres, %add 180 ret i8 %finalres 181 } 182 183 ; Check that the promoted instruction is used for all uses of the original 184 ; sign extension. 185 ; CHECK-LABEL: @oneArgPromotionSExtSeveralUse 186 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64 187 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1 188 ; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]] 189 ; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]] 190 ; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = zext i8 [[LOAD]] to i64 191 ; CHECK: add i64 [[ADDRES]], [[PROMOTED]] 192 ; CHECK: ret 193 define i64 @oneArgPromotionSExtSeveralUse(i8 %arg1, i8* %base) { 194 %add = add nsw i8 %arg1, 1 195 %sextadd = sext i8 %add to i64 196 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 197 %res = load i8* %arrayidx 198 %almostfinalres = zext i8 %res to i64 199 %finalres = add i64 %almostfinalres, %sextadd 200 ret i64 %finalres 201 } 202 203 ; Check all types of rollback mechanism. 204 ; For this test, the sign extension stays in place. 205 ; However, the matching process goes until promoting both the operands 206 ; of the first promotable add implies. 207 ; At this point the rollback mechanism kicks in and restores the states 208 ; until the addressing mode matcher is able to match something: in that 209 ; case promote nothing. 210 ; Along the way, the promotion mechanism involves: 211 ; - Mutating the type of %promotableadd1 and %promotableadd2. 212 ; - Creating a sext for %arg1 and %arg2. 213 ; - Creating a trunc for a use of %promotableadd1. 214 ; - Replacing a bunch of uses. 215 ; - Setting the operands of the promoted instruction with the promoted values. 216 ; - Moving instruction around (mainly sext when promoting instruction). 217 ; Each type of those promotions has to be undo at least once during this 218 ; specific test. 219 ; CHECK-LABEL: @twoArgsPromotionNest 220 ; CHECK: [[ORIG:%[a-zA-Z_0-9-]+]] = add nsw i32 %arg1, %arg2 221 ; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ORIG]], [[ORIG]] 222 ; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 223 ; CHECK: getelementptr inbounds i8* %base, i64 [[SEXT]] 224 ; CHECK: ret 225 define i8 @twoArgsPromotionNest(i32 %arg1, i32 %arg2, i8* %base) { 226 %promotableadd1 = add nsw i32 %arg1, %arg2 227 %promotableadd2 = add nsw i32 %promotableadd1, %promotableadd1 228 %sextadd = sext i32 %promotableadd2 to i64 229 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 230 %res = load i8* %arrayidx 231 ret i8 %res 232 } 233 234 ; Test the InstructionRemover undo, which was the only one not 235 ; kicked in the previous test. 236 ; The matcher first promotes the add, removes the trunc and promotes 237 ; the sext of arg1. 238 ; Then, the matcher cannot use an addressing mode r + r + r, thus it 239 ; rolls back. 240 ; CHECK-LABEL: @twoArgsNoPromotionRemove 241 ; CHECK: [[SEXTARG1:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i32 242 ; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[SEXTARG1]] to i8 243 ; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[TRUNC]], %arg2 244 ; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i64 245 ; CHECK: getelementptr inbounds i8* %base, i64 [[SEXT]] 246 ; CHECK: ret 247 define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) { 248 %sextarg1 = sext i1 %arg1 to i32 249 %trunc = trunc i32 %sextarg1 to i8 250 %add = add nsw i8 %trunc, %arg2 251 %sextadd = sext i8 %add to i64 252 %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd 253 %res = load i8* %arrayidx 254 ret i8 %res 255 } 256 257 ; Ensure that when the profitability checks kicks in, the IR is not modified 258 ; will IgnoreProfitability is on. 259 ; The profitabily check happens when a candidate instruction has several uses. 260 ; The matcher will create a new matcher for each use and check if the 261 ; instruction is in the list of the matched instructions of this new matcher. 262 ; All changes made by the new matchers must be dropped before pursuing 263 ; otherwise the state of the original matcher will be wrong. 264 ; 265 ; Without the profitability check, when checking for the second use of 266 ; arrayidx, the matcher promotes everything all the way to %arg1, %arg2. 267 ; Check that we did not promote anything in the final matching. 268 ; 269 ; <rdar://problem/16020230> 270 ; CHECK-LABEL: @checkProfitability 271 ; CHECK-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64 272 ; CHECK-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64 273 ; CHECK: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1 274 ; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2 275 ; CHECK: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 276 ; BB then 277 ; CHECK: [[BASE1:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48 278 ; CHECK: [[ADDR1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE1]] to i32* 279 ; CHECK: load i32* [[ADDR1]] 280 ; BB else 281 ; CHECK: [[BASE2:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48 282 ; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32* 283 ; CHECK: load i32* [[ADDR2]] 284 ; CHECK: ret 285 ; CHECK-GEP-LABEL: @checkProfitability 286 ; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64 287 ; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64 288 ; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1 289 ; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2 290 ; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 291 ; BB then 292 ; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32* 293 ; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8* 294 ; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48 295 ; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32* 296 ; CHECK-GEP: load i32* [[ADDR1]] 297 ; BB else 298 ; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32* 299 ; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8* 300 ; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48 301 ; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32* 302 ; CHECK-GEP: load i32* [[ADDR2]] 303 ; CHECK-GEP: ret 304 define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) { 305 %shl = shl nsw i32 %arg1, 1 306 %add1 = add nsw i32 %shl, %arg2 307 %sextidx1 = sext i32 %add1 to i64 308 %tmpptr = inttoptr i64 %sextidx1 to i32* 309 %arrayidx1 = getelementptr i32* %tmpptr, i64 12 310 br i1 %test, label %then, label %else 311 then: 312 %res1 = load i32* %arrayidx1 313 br label %end 314 else: 315 %res2 = load i32* %arrayidx1 316 br label %end 317 end: 318 %tmp = phi i32 [%res1, %then], [%res2, %else] 319 %res = add i32 %tmp, %add1 320 %addr = inttoptr i32 %res to i32* 321 %final = load i32* %addr 322 ret i32 %final 323 } 324