1 ; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s 2 3 @var8 = global i8 0 4 @var16 = global i16 0 5 @var32 = global i32 0 6 @var64 = global i64 0 7 8 define void @addsub_i8rhs() minsize { 9 ; CHECK-LABEL: addsub_i8rhs: 10 %val8_tmp = load i8, i8* @var8 11 %lhs32 = load i32, i32* @var32 12 %lhs64 = load i64, i64* @var64 13 14 ; Need this to prevent extension upon load and give a vanilla i8 operand. 15 %val8 = add i8 %val8_tmp, 123 16 17 18 ; Zero-extending to 32-bits 19 %rhs32_zext = zext i8 %val8 to i32 20 %res32_zext = add i32 %lhs32, %rhs32_zext 21 store volatile i32 %res32_zext, i32* @var32 22 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb 23 24 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 25 %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift 26 store volatile i32 %res32_zext_shift, i32* @var32 27 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 28 29 30 ; Zero-extending to 64-bits 31 %rhs64_zext = zext i8 %val8 to i64 32 %res64_zext = add i64 %lhs64, %rhs64_zext 33 store volatile i64 %res64_zext, i64* @var64 34 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb 35 36 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 37 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift 38 store volatile i64 %res64_zext_shift, i64* @var64 39 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 40 41 ; Sign-extending to 32-bits 42 %rhs32_sext = sext i8 %val8 to i32 43 %res32_sext = add i32 %lhs32, %rhs32_sext 44 store volatile i32 %res32_sext, i32* @var32 45 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb 46 47 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 48 %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift 49 store volatile i32 %res32_sext_shift, i32* @var32 50 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1 51 52 ; Sign-extending to 64-bits 53 %rhs64_sext = sext i8 %val8 to i64 54 %res64_sext = add i64 %lhs64, %rhs64_sext 55 store volatile i64 %res64_sext, i64* @var64 56 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb 57 58 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 59 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift 60 store volatile i64 %res64_sext_shift, i64* @var64 61 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4 62 63 64 ; CMP variants 65 %tst = icmp slt i32 %lhs32, %rhs32_zext 66 br i1 %tst, label %end, label %test2 67 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb 68 69 test2: 70 %cmp_sext = sext i8 %val8 to i64 71 %tst2 = icmp eq i64 %lhs64, %cmp_sext 72 br i1 %tst2, label %other, label %end 73 ; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb 74 75 other: 76 store volatile i32 %lhs32, i32* @var32 77 ret void 78 79 end: 80 ret void 81 } 82 83 define void @sub_i8rhs() minsize { 84 ; CHECK-LABEL: sub_i8rhs: 85 %val8_tmp = load i8, i8* @var8 86 %lhs32 = load i32, i32* @var32 87 %lhs64 = load i64, i64* @var64 88 89 ; Need this to prevent extension upon load and give a vanilla i8 operand. 90 %val8 = add i8 %val8_tmp, 123 91 92 93 ; Zero-extending to 32-bits 94 %rhs32_zext = zext i8 %val8 to i32 95 %res32_zext = sub i32 %lhs32, %rhs32_zext 96 store volatile i32 %res32_zext, i32* @var32 97 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb 98 99 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 100 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift 101 store volatile i32 %res32_zext_shift, i32* @var32 102 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 103 104 105 ; Zero-extending to 64-bits 106 %rhs64_zext = zext i8 %val8 to i64 107 %res64_zext = sub i64 %lhs64, %rhs64_zext 108 store volatile i64 %res64_zext, i64* @var64 109 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb 110 111 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 112 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift 113 store volatile i64 %res64_zext_shift, i64* @var64 114 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 115 116 ; Sign-extending to 32-bits 117 %rhs32_sext = sext i8 %val8 to i32 118 %res32_sext = sub i32 %lhs32, %rhs32_sext 119 store volatile i32 %res32_sext, i32* @var32 120 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb 121 122 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 123 %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift 124 store volatile i32 %res32_sext_shift, i32* @var32 125 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1 126 127 ; Sign-extending to 64-bits 128 %rhs64_sext = sext i8 %val8 to i64 129 %res64_sext = sub i64 %lhs64, %rhs64_sext 130 store volatile i64 %res64_sext, i64* @var64 131 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb 132 133 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 134 %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift 135 store volatile i64 %res64_sext_shift, i64* @var64 136 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4 137 138 ret void 139 } 140 141 define void @addsub_i16rhs() minsize { 142 ; CHECK-LABEL: addsub_i16rhs: 143 %val16_tmp = load i16, i16* @var16 144 %lhs32 = load i32, i32* @var32 145 %lhs64 = load i64, i64* @var64 146 147 ; Need this to prevent extension upon load and give a vanilla i16 operand. 148 %val16 = add i16 %val16_tmp, 123 149 150 151 ; Zero-extending to 32-bits 152 %rhs32_zext = zext i16 %val16 to i32 153 %res32_zext = add i32 %lhs32, %rhs32_zext 154 store volatile i32 %res32_zext, i32* @var32 155 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth 156 157 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 158 %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift 159 store volatile i32 %res32_zext_shift, i32* @var32 160 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 161 162 163 ; Zero-extending to 64-bits 164 %rhs64_zext = zext i16 %val16 to i64 165 %res64_zext = add i64 %lhs64, %rhs64_zext 166 store volatile i64 %res64_zext, i64* @var64 167 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth 168 169 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 170 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift 171 store volatile i64 %res64_zext_shift, i64* @var64 172 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 173 174 ; Sign-extending to 32-bits 175 %rhs32_sext = sext i16 %val16 to i32 176 %res32_sext = add i32 %lhs32, %rhs32_sext 177 store volatile i32 %res32_sext, i32* @var32 178 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth 179 180 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 181 %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift 182 store volatile i32 %res32_sext_shift, i32* @var32 183 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1 184 185 ; Sign-extending to 64-bits 186 %rhs64_sext = sext i16 %val16 to i64 187 %res64_sext = add i64 %lhs64, %rhs64_sext 188 store volatile i64 %res64_sext, i64* @var64 189 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth 190 191 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 192 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift 193 store volatile i64 %res64_sext_shift, i64* @var64 194 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4 195 196 197 ; CMP variants 198 %tst = icmp slt i32 %lhs32, %rhs32_zext 199 br i1 %tst, label %end, label %test2 200 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth 201 202 test2: 203 %cmp_sext = sext i16 %val16 to i64 204 %tst2 = icmp eq i64 %lhs64, %cmp_sext 205 br i1 %tst2, label %other, label %end 206 ; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth 207 208 other: 209 store volatile i32 %lhs32, i32* @var32 210 ret void 211 212 end: 213 ret void 214 } 215 216 define void @sub_i16rhs() minsize { 217 ; CHECK-LABEL: sub_i16rhs: 218 %val16_tmp = load i16, i16* @var16 219 %lhs32 = load i32, i32* @var32 220 %lhs64 = load i64, i64* @var64 221 222 ; Need this to prevent extension upon load and give a vanilla i16 operand. 223 %val16 = add i16 %val16_tmp, 123 224 225 226 ; Zero-extending to 32-bits 227 %rhs32_zext = zext i16 %val16 to i32 228 %res32_zext = sub i32 %lhs32, %rhs32_zext 229 store volatile i32 %res32_zext, i32* @var32 230 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth 231 232 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 233 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift 234 store volatile i32 %res32_zext_shift, i32* @var32 235 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 236 237 238 ; Zero-extending to 64-bits 239 %rhs64_zext = zext i16 %val16 to i64 240 %res64_zext = sub i64 %lhs64, %rhs64_zext 241 store volatile i64 %res64_zext, i64* @var64 242 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth 243 244 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 245 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift 246 store volatile i64 %res64_zext_shift, i64* @var64 247 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 248 249 ; Sign-extending to 32-bits 250 %rhs32_sext = sext i16 %val16 to i32 251 %res32_sext = sub i32 %lhs32, %rhs32_sext 252 store volatile i32 %res32_sext, i32* @var32 253 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth 254 255 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 256 %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift 257 store volatile i32 %res32_sext_shift, i32* @var32 258 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1 259 260 ; Sign-extending to 64-bits 261 %rhs64_sext = sext i16 %val16 to i64 262 %res64_sext = sub i64 %lhs64, %rhs64_sext 263 store volatile i64 %res64_sext, i64* @var64 264 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth 265 266 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 267 %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift 268 store volatile i64 %res64_sext_shift, i64* @var64 269 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4 270 271 ret void 272 } 273 274 ; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for 275 ; example), but the remaining instructions are probably not idiomatic 276 ; in the face of "add/sub (shifted register)" so I don't intend to. 277 define void @addsub_i32rhs(i32 %in32) minsize { 278 ; CHECK-LABEL: addsub_i32rhs: 279 %val32_tmp = load i32, i32* @var32 280 %lhs64 = load i64, i64* @var64 281 282 %val32 = add i32 %val32_tmp, 123 283 284 %rhs64_zext = zext i32 %in32 to i64 285 %res64_zext = add i64 %lhs64, %rhs64_zext 286 store volatile i64 %res64_zext, i64* @var64 287 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw 288 289 %rhs64_zext2 = zext i32 %val32 to i64 290 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 291 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift 292 store volatile i64 %res64_zext_shift, i64* @var64 293 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 294 295 %rhs64_sext = sext i32 %val32 to i64 296 %res64_sext = add i64 %lhs64, %rhs64_sext 297 store volatile i64 %res64_sext, i64* @var64 298 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw 299 300 %rhs64_sext_shift = shl i64 %rhs64_sext, 2 301 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift 302 store volatile i64 %res64_sext_shift, i64* @var64 303 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 304 305 ret void 306 } 307 308 define void @sub_i32rhs(i32 %in32) minsize { 309 ; CHECK-LABEL: sub_i32rhs: 310 %val32_tmp = load i32, i32* @var32 311 %lhs64 = load i64, i64* @var64 312 313 %val32 = add i32 %val32_tmp, 123 314 315 %rhs64_zext = zext i32 %in32 to i64 316 %res64_zext = sub i64 %lhs64, %rhs64_zext 317 store volatile i64 %res64_zext, i64* @var64 318 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw 319 320 %rhs64_zext2 = zext i32 %val32 to i64 321 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 322 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift 323 store volatile i64 %res64_zext_shift, i64* @var64 324 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 325 326 %rhs64_sext = sext i32 %val32 to i64 327 %res64_sext = sub i64 %lhs64, %rhs64_sext 328 store volatile i64 %res64_sext, i64* @var64 329 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw 330 331 %rhs64_sext_shift = shl i64 %rhs64_sext, 2 332 %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift 333 store volatile i64 %res64_sext_shift, i64* @var64 334 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 335 336 ret void 337 } 338 339 ; Check that implicit zext from w reg write is used instead of uxtw form of add. 340 define i64 @add_fold_uxtw(i32 %x, i64 %y) { 341 ; CHECK-LABEL: add_fold_uxtw: 342 entry: 343 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 344 %m = and i32 %x, 3 345 %ext = zext i32 %m to i64 346 ; CHECK-NEXT: add x0, x1, x[[TMP]] 347 %ret = add i64 %y, %ext 348 ret i64 %ret 349 } 350 351 ; Check that implicit zext from w reg write is used instead of uxtw 352 ; form of sub and that mov WZR is folded to form a neg instruction. 353 define i64 @sub_fold_uxtw_xzr(i32 %x) { 354 ; CHECK-LABEL: sub_fold_uxtw_xzr: 355 entry: 356 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 357 %m = and i32 %x, 3 358 %ext = zext i32 %m to i64 359 ; CHECK-NEXT: neg x0, x[[TMP]] 360 %ret = sub i64 0, %ext 361 ret i64 %ret 362 } 363 364 ; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp. 365 define i1 @cmp_fold_uxtw(i32 %x, i64 %y) { 366 ; CHECK-LABEL: cmp_fold_uxtw: 367 entry: 368 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 369 %m = and i32 %x, 3 370 %ext = zext i32 %m to i64 371 ; CHECK-NEXT: cmp x1, x[[TMP]] 372 ; CHECK-NEXT: cset 373 %ret = icmp eq i64 %y, %ext 374 ret i1 %ret 375 } 376 377 ; Check that implicit zext from w reg write is used instead of uxtw 378 ; form of add, leading to madd selection. 379 define i64 @madd_fold_uxtw(i32 %x, i64 %y) { 380 ; CHECK-LABEL: madd_fold_uxtw: 381 entry: 382 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 383 %m = and i32 %x, 3 384 %ext = zext i32 %m to i64 385 ; CHECK-NEXT: madd x0, x1, x1, x[[TMP]] 386 %mul = mul i64 %y, %y 387 %ret = add i64 %mul, %ext 388 ret i64 %ret 389 } 390 391 ; Check that implicit zext from w reg write is used instead of uxtw 392 ; form of sub, leading to sub/cmp folding. 393 ; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp. 394 define i1 @cmp_sub_fold_uxtw(i32 %x, i64 %y, i64 %z) { 395 ; CHECK-LABEL: cmp_sub_fold_uxtw: 396 entry: 397 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 398 %m = and i32 %x, 3 399 %ext = zext i32 %m to i64 400 ; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]] 401 ; CHECK-NEXT: cset 402 %sub = sub i64 %z, %ext 403 %ret = icmp eq i64 %sub, 0 404 ret i1 %ret 405 } 406 407 ; Check that implicit zext from w reg write is used instead of uxtw 408 ; form of add and add of -1 gets selected as sub. 409 define i64 @add_imm_fold_uxtw(i32 %x) { 410 ; CHECK-LABEL: add_imm_fold_uxtw: 411 entry: 412 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 413 %m = and i32 %x, 3 414 %ext = zext i32 %m to i64 415 ; CHECK-NEXT: sub x0, x[[TMP]], #1 416 %ret = add i64 %ext, -1 417 ret i64 %ret 418 } 419 420 ; Check that implicit zext from w reg write is used instead of uxtw 421 ; form of add and add lsl form gets selected. 422 define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) { 423 ; CHECK-LABEL: add_lsl_fold_uxtw: 424 entry: 425 ; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3 426 %m = or i32 %x, 3 427 %ext = zext i32 %m to i64 428 %shift = shl i64 %y, 3 429 ; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3 430 %ret = add i64 %ext, %shift 431 ret i64 %ret 432 } 433