1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC 4 5 ; This file checks that atomic (non-seq_cst) stores of immediate values are 6 ; done in one mov instruction and not 2. More precisely, it makes sure that the 7 ; immediate is not first copied uselessly into a register. 8 9 ; Similarily, it checks that a binary operation of an immediate with an atomic 10 ; variable that is stored back in that variable is done as a single instruction. 11 ; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release) 12 ; should be just an add instruction, instead of loading x into a register, doing 13 ; an add and storing the result back. 14 ; The binary operations supported are currently add, and, or, xor. 15 ; sub is not supported because they are translated by an addition of the 16 ; negated immediate. 17 ; 18 ; We also check the same patterns: 19 ; - For inc/dec. 20 ; - For register instead of immediate operands. 21 ; - For floating point operations. 22 23 ; seq_cst stores are left as (lock) xchgl, but we try to check every other 24 ; attribute at least once. 25 26 ; Please note that these operations do not require the lock prefix: only 27 ; sequentially consistent stores require this kind of protection on X86. 28 ; And even for seq_cst operations, llvm uses the xchg instruction which has 29 ; an implicit lock prefix, so making it explicit is not required. 30 31 define void @store_atomic_imm_8(i8* %p) { 32 ; X64-LABEL: store_atomic_imm_8: 33 ; X64: movb 34 ; X64-NOT: movb 35 ; X32-LABEL: store_atomic_imm_8: 36 ; X32: movb 37 ; X32-NOT: movb 38 store atomic i8 42, i8* %p release, align 1 39 ret void 40 } 41 42 define void @store_atomic_imm_16(i16* %p) { 43 ; X64-LABEL: store_atomic_imm_16: 44 ; X64: movw 45 ; X64-NOT: movw 46 ; X32-LABEL: store_atomic_imm_16: 47 ; X32: movw 48 ; X32-NOT: movw 49 store atomic i16 42, i16* %p monotonic, align 2 50 ret void 51 } 52 53 define void @store_atomic_imm_32(i32* %p) { 54 ; X64-LABEL: store_atomic_imm_32: 55 ; X64: movl 56 ; X64-NOT: movl 57 ; On 32 bits, there is an extra movl for each of those functions 58 ; (probably for alignment reasons). 59 ; X32-LABEL: store_atomic_imm_32: 60 ; X32: movl 4(%esp), %eax 61 ; X32: movl 62 ; X32-NOT: movl 63 store atomic i32 42, i32* %p release, align 4 64 ret void 65 } 66 67 define void @store_atomic_imm_64(i64* %p) { 68 ; X64-LABEL: store_atomic_imm_64: 69 ; X64: movq 70 ; X64-NOT: movq 71 ; These are implemented with a CAS loop on 32 bit architectures, and thus 72 ; cannot be optimized in the same way as the others. 73 ; X32-LABEL: store_atomic_imm_64: 74 ; X32: cmpxchg8b 75 store atomic i64 42, i64* %p release, align 8 76 ret void 77 } 78 79 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, 80 ; even on X64, one must use movabsq that can only target a register. 81 define void @store_atomic_imm_64_big(i64* %p) { 82 ; X64-LABEL: store_atomic_imm_64_big: 83 ; X64: movabsq 84 ; X64: movq 85 store atomic i64 100000000000, i64* %p monotonic, align 8 86 ret void 87 } 88 89 ; It would be incorrect to replace a lock xchgl by a movl 90 define void @store_atomic_imm_32_seq_cst(i32* %p) { 91 ; X64-LABEL: store_atomic_imm_32_seq_cst: 92 ; X64: xchgl 93 ; X32-LABEL: store_atomic_imm_32_seq_cst: 94 ; X32: xchgl 95 store atomic i32 42, i32* %p seq_cst, align 4 96 ret void 97 } 98 99 ; ----- ADD ----- 100 101 define void @add_8i(i8* %p) { 102 ; X64-LABEL: add_8i: 103 ; X64-NOT: lock 104 ; X64: addb 105 ; X64-NOT: movb 106 ; X32-LABEL: add_8i: 107 ; X32-NOT: lock 108 ; X32: addb 109 ; X32-NOT: movb 110 %1 = load atomic i8, i8* %p seq_cst, align 1 111 %2 = add i8 %1, 2 112 store atomic i8 %2, i8* %p release, align 1 113 ret void 114 } 115 116 define void @add_8r(i8* %p, i8 %v) { 117 ; X64-LABEL: add_8r: 118 ; X64-NOT: lock 119 ; X64: addb 120 ; X64-NOT: movb 121 ; X32-LABEL: add_8r: 122 ; X32-NOT: lock 123 ; X32: addb 124 ; X32-NOT: movb 125 %1 = load atomic i8, i8* %p seq_cst, align 1 126 %2 = add i8 %1, %v 127 store atomic i8 %2, i8* %p release, align 1 128 ret void 129 } 130 131 define void @add_16i(i16* %p) { 132 ; Currently the transformation is not done on 16 bit accesses, as the backend 133 ; treat 16 bit arithmetic as expensive on X86/X86_64. 134 ; X64-LABEL: add_16i: 135 ; X64-NOT: addw 136 ; X32-LABEL: add_16i: 137 ; X32-NOT: addw 138 %1 = load atomic i16, i16* %p acquire, align 2 139 %2 = add i16 %1, 2 140 store atomic i16 %2, i16* %p release, align 2 141 ret void 142 } 143 144 define void @add_16r(i16* %p, i16 %v) { 145 ; Currently the transformation is not done on 16 bit accesses, as the backend 146 ; treat 16 bit arithmetic as expensive on X86/X86_64. 147 ; X64-LABEL: add_16r: 148 ; X64-NOT: addw 149 ; X32-LABEL: add_16r: 150 ; X32-NOT: addw [.*], ( 151 %1 = load atomic i16, i16* %p acquire, align 2 152 %2 = add i16 %1, %v 153 store atomic i16 %2, i16* %p release, align 2 154 ret void 155 } 156 157 define void @add_32i(i32* %p) { 158 ; X64-LABEL: add_32i: 159 ; X64-NOT: lock 160 ; X64: addl 161 ; X64-NOT: movl 162 ; X32-LABEL: add_32i: 163 ; X32-NOT: lock 164 ; X32: addl 165 ; X32-NOT: movl 166 %1 = load atomic i32, i32* %p acquire, align 4 167 %2 = add i32 %1, 2 168 store atomic i32 %2, i32* %p monotonic, align 4 169 ret void 170 } 171 172 define void @add_32r(i32* %p, i32 %v) { 173 ; X64-LABEL: add_32r: 174 ; X64-NOT: lock 175 ; X64: addl 176 ; X64-NOT: movl 177 ; X32-LABEL: add_32r: 178 ; X32-NOT: lock 179 ; X32: addl 180 ; X32-NOT: movl 181 %1 = load atomic i32, i32* %p acquire, align 4 182 %2 = add i32 %1, %v 183 store atomic i32 %2, i32* %p monotonic, align 4 184 ret void 185 } 186 187 ; The following is a corner case where the load is added to itself. The pattern 188 ; matching should not fold this. We only test with 32-bit add, but the same 189 ; applies to other sizes and operations. 190 define void @add_32r_self(i32* %p) { 191 ; X64-LABEL: add_32r_self: 192 ; X64-NOT: lock 193 ; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]] 194 ; X64: addl %[[R]], %[[R]] 195 ; X64: movl %[[R]], (%[[M]]) 196 ; X32-LABEL: add_32r_self: 197 ; X32-NOT: lock 198 ; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]] 199 ; X32: addl %[[R]], %[[R]] 200 ; X32: movl %[[R]], (%[[M]]) 201 %1 = load atomic i32, i32* %p acquire, align 4 202 %2 = add i32 %1, %1 203 store atomic i32 %2, i32* %p monotonic, align 4 204 ret void 205 } 206 207 ; The following is a corner case where the load's result is returned. The 208 ; optimizer isn't allowed to duplicate the load because it's atomic. 209 define i32 @add_32r_ret_load(i32* %p, i32 %v) { 210 ; X64-LABEL: add_32r_ret_load: 211 ; X64-NOT: lock 212 ; X64: movl (%rdi), %eax 213 ; X64-NEXT: addl %eax, %esi 214 ; X64-NEXT: movl %esi, (%rdi) 215 ; X64-NEXT: retq 216 ; X32-LABEL: add_32r_ret_load: 217 ; X32-NOT: lock 218 ; X32: movl 4(%esp), %[[P:[a-z]+]] 219 ; X32-NEXT: movl (%[[P]]), 220 ; X32-NOT: %[[P]] 221 ; More code here, we just don't want it to load from P. 222 ; X32: movl %{{.*}}, (%[[P]]) 223 ; X32-NEXT: retl 224 %1 = load atomic i32, i32* %p acquire, align 4 225 %2 = add i32 %1, %v 226 store atomic i32 %2, i32* %p monotonic, align 4 227 ret i32 %1 228 } 229 230 define void @add_64i(i64* %p) { 231 ; X64-LABEL: add_64i: 232 ; X64-NOT: lock 233 ; X64: addq 234 ; X64-NOT: movq 235 ; We do not check X86-32 as it cannot do 'addq'. 236 ; X32-LABEL: add_64i: 237 %1 = load atomic i64, i64* %p acquire, align 8 238 %2 = add i64 %1, 2 239 store atomic i64 %2, i64* %p release, align 8 240 ret void 241 } 242 243 define void @add_64r(i64* %p, i64 %v) { 244 ; X64-LABEL: add_64r: 245 ; X64-NOT: lock 246 ; X64: addq 247 ; X64-NOT: movq 248 ; We do not check X86-32 as it cannot do 'addq'. 249 ; X32-LABEL: add_64r: 250 %1 = load atomic i64, i64* %p acquire, align 8 251 %2 = add i64 %1, %v 252 store atomic i64 %2, i64* %p release, align 8 253 ret void 254 } 255 256 define void @add_32i_seq_cst(i32* %p) { 257 ; X64-LABEL: add_32i_seq_cst: 258 ; X64: xchgl 259 ; X32-LABEL: add_32i_seq_cst: 260 ; X32: xchgl 261 %1 = load atomic i32, i32* %p monotonic, align 4 262 %2 = add i32 %1, 2 263 store atomic i32 %2, i32* %p seq_cst, align 4 264 ret void 265 } 266 267 define void @add_32r_seq_cst(i32* %p, i32 %v) { 268 ; X64-LABEL: add_32r_seq_cst: 269 ; X64: xchgl 270 ; X32-LABEL: add_32r_seq_cst: 271 ; X32: xchgl 272 %1 = load atomic i32, i32* %p monotonic, align 4 273 %2 = add i32 %1, %v 274 store atomic i32 %2, i32* %p seq_cst, align 4 275 ret void 276 } 277 278 ; ----- AND ----- 279 280 define void @and_8i(i8* %p) { 281 ; X64-LABEL: and_8i: 282 ; X64-NOT: lock 283 ; X64: andb 284 ; X64-NOT: movb 285 ; X32-LABEL: and_8i: 286 ; X32-NOT: lock 287 ; X32: andb 288 ; X32-NOT: movb 289 %1 = load atomic i8, i8* %p monotonic, align 1 290 %2 = and i8 %1, 2 291 store atomic i8 %2, i8* %p release, align 1 292 ret void 293 } 294 295 define void @and_8r(i8* %p, i8 %v) { 296 ; X64-LABEL: and_8r: 297 ; X64-NOT: lock 298 ; X64: andb 299 ; X64-NOT: movb 300 ; X32-LABEL: and_8r: 301 ; X32-NOT: lock 302 ; X32: andb 303 ; X32-NOT: movb 304 %1 = load atomic i8, i8* %p monotonic, align 1 305 %2 = and i8 %1, %v 306 store atomic i8 %2, i8* %p release, align 1 307 ret void 308 } 309 310 define void @and_16i(i16* %p) { 311 ; Currently the transformation is not done on 16 bit accesses, as the backend 312 ; treat 16 bit arithmetic as expensive on X86/X86_64. 313 ; X64-LABEL: and_16i: 314 ; X64-NOT: andw 315 ; X32-LABEL: and_16i: 316 ; X32-NOT: andw 317 %1 = load atomic i16, i16* %p acquire, align 2 318 %2 = and i16 %1, 2 319 store atomic i16 %2, i16* %p release, align 2 320 ret void 321 } 322 323 define void @and_16r(i16* %p, i16 %v) { 324 ; Currently the transformation is not done on 16 bit accesses, as the backend 325 ; treat 16 bit arithmetic as expensive on X86/X86_64. 326 ; X64-LABEL: and_16r: 327 ; X64-NOT: andw 328 ; X32-LABEL: and_16r: 329 ; X32-NOT: andw [.*], ( 330 %1 = load atomic i16, i16* %p acquire, align 2 331 %2 = and i16 %1, %v 332 store atomic i16 %2, i16* %p release, align 2 333 ret void 334 } 335 336 define void @and_32i(i32* %p) { 337 ; X64-LABEL: and_32i: 338 ; X64-NOT: lock 339 ; X64: andl 340 ; X64-NOT: movl 341 ; X32-LABEL: and_32i: 342 ; X32-NOT: lock 343 ; X32: andl 344 ; X32-NOT: movl 345 %1 = load atomic i32, i32* %p acquire, align 4 346 %2 = and i32 %1, 2 347 store atomic i32 %2, i32* %p release, align 4 348 ret void 349 } 350 351 define void @and_32r(i32* %p, i32 %v) { 352 ; X64-LABEL: and_32r: 353 ; X64-NOT: lock 354 ; X64: andl 355 ; X64-NOT: movl 356 ; X32-LABEL: and_32r: 357 ; X32-NOT: lock 358 ; X32: andl 359 ; X32-NOT: movl 360 %1 = load atomic i32, i32* %p acquire, align 4 361 %2 = and i32 %1, %v 362 store atomic i32 %2, i32* %p release, align 4 363 ret void 364 } 365 366 define void @and_64i(i64* %p) { 367 ; X64-LABEL: and_64i: 368 ; X64-NOT: lock 369 ; X64: andq 370 ; X64-NOT: movq 371 ; We do not check X86-32 as it cannot do 'andq'. 372 ; X32-LABEL: and_64i: 373 %1 = load atomic i64, i64* %p acquire, align 8 374 %2 = and i64 %1, 2 375 store atomic i64 %2, i64* %p release, align 8 376 ret void 377 } 378 379 define void @and_64r(i64* %p, i64 %v) { 380 ; X64-LABEL: and_64r: 381 ; X64-NOT: lock 382 ; X64: andq 383 ; X64-NOT: movq 384 ; We do not check X86-32 as it cannot do 'andq'. 385 ; X32-LABEL: and_64r: 386 %1 = load atomic i64, i64* %p acquire, align 8 387 %2 = and i64 %1, %v 388 store atomic i64 %2, i64* %p release, align 8 389 ret void 390 } 391 392 define void @and_32i_seq_cst(i32* %p) { 393 ; X64-LABEL: and_32i_seq_cst: 394 ; X64: xchgl 395 ; X32-LABEL: and_32i_seq_cst: 396 ; X32: xchgl 397 %1 = load atomic i32, i32* %p monotonic, align 4 398 %2 = and i32 %1, 2 399 store atomic i32 %2, i32* %p seq_cst, align 4 400 ret void 401 } 402 403 define void @and_32r_seq_cst(i32* %p, i32 %v) { 404 ; X64-LABEL: and_32r_seq_cst: 405 ; X64: xchgl 406 ; X32-LABEL: and_32r_seq_cst: 407 ; X32: xchgl 408 %1 = load atomic i32, i32* %p monotonic, align 4 409 %2 = and i32 %1, %v 410 store atomic i32 %2, i32* %p seq_cst, align 4 411 ret void 412 } 413 414 ; ----- OR ----- 415 416 define void @or_8i(i8* %p) { 417 ; X64-LABEL: or_8i: 418 ; X64-NOT: lock 419 ; X64: orb 420 ; X64-NOT: movb 421 ; X32-LABEL: or_8i: 422 ; X32-NOT: lock 423 ; X32: orb 424 ; X32-NOT: movb 425 %1 = load atomic i8, i8* %p acquire, align 1 426 %2 = or i8 %1, 2 427 store atomic i8 %2, i8* %p release, align 1 428 ret void 429 } 430 431 define void @or_8r(i8* %p, i8 %v) { 432 ; X64-LABEL: or_8r: 433 ; X64-NOT: lock 434 ; X64: orb 435 ; X64-NOT: movb 436 ; X32-LABEL: or_8r: 437 ; X32-NOT: lock 438 ; X32: orb 439 ; X32-NOT: movb 440 %1 = load atomic i8, i8* %p acquire, align 1 441 %2 = or i8 %1, %v 442 store atomic i8 %2, i8* %p release, align 1 443 ret void 444 } 445 446 define void @or_16i(i16* %p) { 447 ; X64-LABEL: or_16i: 448 ; X64-NOT: orw 449 ; X32-LABEL: or_16i: 450 ; X32-NOT: orw 451 %1 = load atomic i16, i16* %p acquire, align 2 452 %2 = or i16 %1, 2 453 store atomic i16 %2, i16* %p release, align 2 454 ret void 455 } 456 457 define void @or_16r(i16* %p, i16 %v) { 458 ; X64-LABEL: or_16r: 459 ; X64-NOT: orw 460 ; X32-LABEL: or_16r: 461 ; X32-NOT: orw [.*], ( 462 %1 = load atomic i16, i16* %p acquire, align 2 463 %2 = or i16 %1, %v 464 store atomic i16 %2, i16* %p release, align 2 465 ret void 466 } 467 468 define void @or_32i(i32* %p) { 469 ; X64-LABEL: or_32i: 470 ; X64-NOT: lock 471 ; X64: orl 472 ; X64-NOT: movl 473 ; X32-LABEL: or_32i: 474 ; X32-NOT: lock 475 ; X32: orl 476 ; X32-NOT: movl 477 %1 = load atomic i32, i32* %p acquire, align 4 478 %2 = or i32 %1, 2 479 store atomic i32 %2, i32* %p release, align 4 480 ret void 481 } 482 483 define void @or_32r(i32* %p, i32 %v) { 484 ; X64-LABEL: or_32r: 485 ; X64-NOT: lock 486 ; X64: orl 487 ; X64-NOT: movl 488 ; X32-LABEL: or_32r: 489 ; X32-NOT: lock 490 ; X32: orl 491 ; X32-NOT: movl 492 %1 = load atomic i32, i32* %p acquire, align 4 493 %2 = or i32 %1, %v 494 store atomic i32 %2, i32* %p release, align 4 495 ret void 496 } 497 498 define void @or_64i(i64* %p) { 499 ; X64-LABEL: or_64i: 500 ; X64-NOT: lock 501 ; X64: orq 502 ; X64-NOT: movq 503 ; We do not check X86-32 as it cannot do 'orq'. 504 ; X32-LABEL: or_64i: 505 %1 = load atomic i64, i64* %p acquire, align 8 506 %2 = or i64 %1, 2 507 store atomic i64 %2, i64* %p release, align 8 508 ret void 509 } 510 511 define void @or_64r(i64* %p, i64 %v) { 512 ; X64-LABEL: or_64r: 513 ; X64-NOT: lock 514 ; X64: orq 515 ; X64-NOT: movq 516 ; We do not check X86-32 as it cannot do 'orq'. 517 ; X32-LABEL: or_64r: 518 %1 = load atomic i64, i64* %p acquire, align 8 519 %2 = or i64 %1, %v 520 store atomic i64 %2, i64* %p release, align 8 521 ret void 522 } 523 524 define void @or_32i_seq_cst(i32* %p) { 525 ; X64-LABEL: or_32i_seq_cst: 526 ; X64: xchgl 527 ; X32-LABEL: or_32i_seq_cst: 528 ; X32: xchgl 529 %1 = load atomic i32, i32* %p monotonic, align 4 530 %2 = or i32 %1, 2 531 store atomic i32 %2, i32* %p seq_cst, align 4 532 ret void 533 } 534 535 define void @or_32r_seq_cst(i32* %p, i32 %v) { 536 ; X64-LABEL: or_32r_seq_cst: 537 ; X64: xchgl 538 ; X32-LABEL: or_32r_seq_cst: 539 ; X32: xchgl 540 %1 = load atomic i32, i32* %p monotonic, align 4 541 %2 = or i32 %1, %v 542 store atomic i32 %2, i32* %p seq_cst, align 4 543 ret void 544 } 545 546 ; ----- XOR ----- 547 548 define void @xor_8i(i8* %p) { 549 ; X64-LABEL: xor_8i: 550 ; X64-NOT: lock 551 ; X64: xorb 552 ; X64-NOT: movb 553 ; X32-LABEL: xor_8i: 554 ; X32-NOT: lock 555 ; X32: xorb 556 ; X32-NOT: movb 557 %1 = load atomic i8, i8* %p acquire, align 1 558 %2 = xor i8 %1, 2 559 store atomic i8 %2, i8* %p release, align 1 560 ret void 561 } 562 563 define void @xor_8r(i8* %p, i8 %v) { 564 ; X64-LABEL: xor_8r: 565 ; X64-NOT: lock 566 ; X64: xorb 567 ; X64-NOT: movb 568 ; X32-LABEL: xor_8r: 569 ; X32-NOT: lock 570 ; X32: xorb 571 ; X32-NOT: movb 572 %1 = load atomic i8, i8* %p acquire, align 1 573 %2 = xor i8 %1, %v 574 store atomic i8 %2, i8* %p release, align 1 575 ret void 576 } 577 578 define void @xor_16i(i16* %p) { 579 ; X64-LABEL: xor_16i: 580 ; X64-NOT: xorw 581 ; X32-LABEL: xor_16i: 582 ; X32-NOT: xorw 583 %1 = load atomic i16, i16* %p acquire, align 2 584 %2 = xor i16 %1, 2 585 store atomic i16 %2, i16* %p release, align 2 586 ret void 587 } 588 589 define void @xor_16r(i16* %p, i16 %v) { 590 ; X64-LABEL: xor_16r: 591 ; X64-NOT: xorw 592 ; X32-LABEL: xor_16r: 593 ; X32-NOT: xorw [.*], ( 594 %1 = load atomic i16, i16* %p acquire, align 2 595 %2 = xor i16 %1, %v 596 store atomic i16 %2, i16* %p release, align 2 597 ret void 598 } 599 600 define void @xor_32i(i32* %p) { 601 ; X64-LABEL: xor_32i: 602 ; X64-NOT: lock 603 ; X64: xorl 604 ; X64-NOT: movl 605 ; X32-LABEL: xor_32i: 606 ; X32-NOT: lock 607 ; X32: xorl 608 ; X32-NOT: movl 609 %1 = load atomic i32, i32* %p acquire, align 4 610 %2 = xor i32 %1, 2 611 store atomic i32 %2, i32* %p release, align 4 612 ret void 613 } 614 615 define void @xor_32r(i32* %p, i32 %v) { 616 ; X64-LABEL: xor_32r: 617 ; X64-NOT: lock 618 ; X64: xorl 619 ; X64-NOT: movl 620 ; X32-LABEL: xor_32r: 621 ; X32-NOT: lock 622 ; X32: xorl 623 ; X32-NOT: movl 624 %1 = load atomic i32, i32* %p acquire, align 4 625 %2 = xor i32 %1, %v 626 store atomic i32 %2, i32* %p release, align 4 627 ret void 628 } 629 630 define void @xor_64i(i64* %p) { 631 ; X64-LABEL: xor_64i: 632 ; X64-NOT: lock 633 ; X64: xorq 634 ; X64-NOT: movq 635 ; We do not check X86-32 as it cannot do 'xorq'. 636 ; X32-LABEL: xor_64i: 637 %1 = load atomic i64, i64* %p acquire, align 8 638 %2 = xor i64 %1, 2 639 store atomic i64 %2, i64* %p release, align 8 640 ret void 641 } 642 643 define void @xor_64r(i64* %p, i64 %v) { 644 ; X64-LABEL: xor_64r: 645 ; X64-NOT: lock 646 ; X64: xorq 647 ; X64-NOT: movq 648 ; We do not check X86-32 as it cannot do 'xorq'. 649 ; X32-LABEL: xor_64r: 650 %1 = load atomic i64, i64* %p acquire, align 8 651 %2 = xor i64 %1, %v 652 store atomic i64 %2, i64* %p release, align 8 653 ret void 654 } 655 656 define void @xor_32i_seq_cst(i32* %p) { 657 ; X64-LABEL: xor_32i_seq_cst: 658 ; X64: xchgl 659 ; X32-LABEL: xor_32i_seq_cst: 660 ; X32: xchgl 661 %1 = load atomic i32, i32* %p monotonic, align 4 662 %2 = xor i32 %1, 2 663 store atomic i32 %2, i32* %p seq_cst, align 4 664 ret void 665 } 666 667 define void @xor_32r_seq_cst(i32* %p, i32 %v) { 668 ; X64-LABEL: xor_32r_seq_cst: 669 ; X64: xchgl 670 ; X32-LABEL: xor_32r_seq_cst: 671 ; X32: xchgl 672 %1 = load atomic i32, i32* %p monotonic, align 4 673 %2 = xor i32 %1, %v 674 store atomic i32 %2, i32* %p seq_cst, align 4 675 ret void 676 } 677 678 ; ----- INC ----- 679 680 define void @inc_8(i8* %p) { 681 ; X64-LABEL: inc_8: 682 ; X64-NOT: lock 683 ; X64: incb 684 ; X64-NOT: movb 685 ; X32-LABEL: inc_8: 686 ; X32-NOT: lock 687 ; X32: incb 688 ; X32-NOT: movb 689 ; SLOW_INC-LABEL: inc_8: 690 ; SLOW_INC-NOT: incb 691 ; SLOW_INC-NOT: movb 692 %1 = load atomic i8, i8* %p seq_cst, align 1 693 %2 = add i8 %1, 1 694 store atomic i8 %2, i8* %p release, align 1 695 ret void 696 } 697 698 define void @inc_16(i16* %p) { 699 ; Currently the transformation is not done on 16 bit accesses, as the backend 700 ; treat 16 bit arithmetic as expensive on X86/X86_64. 701 ; X64-LABEL: inc_16: 702 ; X64-NOT: incw 703 ; X32-LABEL: inc_16: 704 ; X32-NOT: incw 705 ; SLOW_INC-LABEL: inc_16: 706 ; SLOW_INC-NOT: incw 707 %1 = load atomic i16, i16* %p acquire, align 2 708 %2 = add i16 %1, 1 709 store atomic i16 %2, i16* %p release, align 2 710 ret void 711 } 712 713 define void @inc_32(i32* %p) { 714 ; X64-LABEL: inc_32: 715 ; X64-NOT: lock 716 ; X64: incl 717 ; X64-NOT: movl 718 ; X32-LABEL: inc_32: 719 ; X32-NOT: lock 720 ; X32: incl 721 ; X32-NOT: movl 722 ; SLOW_INC-LABEL: inc_32: 723 ; SLOW_INC-NOT: incl 724 ; SLOW_INC-NOT: movl 725 %1 = load atomic i32, i32* %p acquire, align 4 726 %2 = add i32 %1, 1 727 store atomic i32 %2, i32* %p monotonic, align 4 728 ret void 729 } 730 731 define void @inc_64(i64* %p) { 732 ; X64-LABEL: inc_64: 733 ; X64-NOT: lock 734 ; X64: incq 735 ; X64-NOT: movq 736 ; We do not check X86-32 as it cannot do 'incq'. 737 ; X32-LABEL: inc_64: 738 ; SLOW_INC-LABEL: inc_64: 739 ; SLOW_INC-NOT: incq 740 ; SLOW_INC-NOT: movq 741 %1 = load atomic i64, i64* %p acquire, align 8 742 %2 = add i64 %1, 1 743 store atomic i64 %2, i64* %p release, align 8 744 ret void 745 } 746 747 define void @inc_32_seq_cst(i32* %p) { 748 ; X64-LABEL: inc_32_seq_cst: 749 ; X64: xchgl 750 ; X32-LABEL: inc_32_seq_cst: 751 ; X32: xchgl 752 %1 = load atomic i32, i32* %p monotonic, align 4 753 %2 = add i32 %1, 1 754 store atomic i32 %2, i32* %p seq_cst, align 4 755 ret void 756 } 757 758 ; ----- DEC ----- 759 760 define void @dec_8(i8* %p) { 761 ; X64-LABEL: dec_8: 762 ; X64-NOT: lock 763 ; X64: decb 764 ; X64-NOT: movb 765 ; X32-LABEL: dec_8: 766 ; X32-NOT: lock 767 ; X32: decb 768 ; X32-NOT: movb 769 ; SLOW_INC-LABEL: dec_8: 770 ; SLOW_INC-NOT: decb 771 ; SLOW_INC-NOT: movb 772 %1 = load atomic i8, i8* %p seq_cst, align 1 773 %2 = sub i8 %1, 1 774 store atomic i8 %2, i8* %p release, align 1 775 ret void 776 } 777 778 define void @dec_16(i16* %p) { 779 ; Currently the transformation is not done on 16 bit accesses, as the backend 780 ; treat 16 bit arithmetic as expensive on X86/X86_64. 781 ; X64-LABEL: dec_16: 782 ; X64-NOT: decw 783 ; X32-LABEL: dec_16: 784 ; X32-NOT: decw 785 ; SLOW_INC-LABEL: dec_16: 786 ; SLOW_INC-NOT: decw 787 %1 = load atomic i16, i16* %p acquire, align 2 788 %2 = sub i16 %1, 1 789 store atomic i16 %2, i16* %p release, align 2 790 ret void 791 } 792 793 define void @dec_32(i32* %p) { 794 ; X64-LABEL: dec_32: 795 ; X64-NOT: lock 796 ; X64: decl 797 ; X64-NOT: movl 798 ; X32-LABEL: dec_32: 799 ; X32-NOT: lock 800 ; X32: decl 801 ; X32-NOT: movl 802 ; SLOW_INC-LABEL: dec_32: 803 ; SLOW_INC-NOT: decl 804 ; SLOW_INC-NOT: movl 805 %1 = load atomic i32, i32* %p acquire, align 4 806 %2 = sub i32 %1, 1 807 store atomic i32 %2, i32* %p monotonic, align 4 808 ret void 809 } 810 811 define void @dec_64(i64* %p) { 812 ; X64-LABEL: dec_64: 813 ; X64-NOT: lock 814 ; X64: decq 815 ; X64-NOT: movq 816 ; We do not check X86-32 as it cannot do 'decq'. 817 ; X32-LABEL: dec_64: 818 ; SLOW_INC-LABEL: dec_64: 819 ; SLOW_INC-NOT: decq 820 ; SLOW_INC-NOT: movq 821 %1 = load atomic i64, i64* %p acquire, align 8 822 %2 = sub i64 %1, 1 823 store atomic i64 %2, i64* %p release, align 8 824 ret void 825 } 826 827 define void @dec_32_seq_cst(i32* %p) { 828 ; X64-LABEL: dec_32_seq_cst: 829 ; X64: xchgl 830 ; X32-LABEL: dec_32_seq_cst: 831 ; X32: xchgl 832 %1 = load atomic i32, i32* %p monotonic, align 4 833 %2 = sub i32 %1, 1 834 store atomic i32 %2, i32* %p seq_cst, align 4 835 ret void 836 } 837 838 ; ----- FADD ----- 839 840 define void @fadd_32r(float* %loc, float %val) { 841 ; X64-LABEL: fadd_32r: 842 ; X64-NOT: lock 843 ; X64-NOT: mov 844 ; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]] 845 ; X64-NEXT: movss %[[XMM]], (%[[M]]) 846 ; X32-LABEL: fadd_32r: 847 ; Don't check x86-32. 848 ; LLVM's SSE handling is conservative on x86-32 even without using atomics. 849 %floc = bitcast float* %loc to i32* 850 %1 = load atomic i32, i32* %floc seq_cst, align 4 851 %2 = bitcast i32 %1 to float 852 %add = fadd float %2, %val 853 %3 = bitcast float %add to i32 854 store atomic i32 %3, i32* %floc release, align 4 855 ret void 856 } 857 858 define void @fadd_64r(double* %loc, double %val) { 859 ; X64-LABEL: fadd_64r: 860 ; X64-NOT: lock 861 ; X64-NOT: mov 862 ; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]] 863 ; X64-NEXT: movsd %[[XMM]], (%[[M]]) 864 ; X32-LABEL: fadd_64r: 865 ; Don't check x86-32 (see comment above). 866 %floc = bitcast double* %loc to i64* 867 %1 = load atomic i64, i64* %floc seq_cst, align 8 868 %2 = bitcast i64 %1 to double 869 %add = fadd double %2, %val 870 %3 = bitcast double %add to i64 871 store atomic i64 %3, i64* %floc release, align 8 872 ret void 873 } 874 875 @glob32 = global float 0.000000e+00, align 4 876 @glob64 = global double 0.000000e+00, align 8 877 878 ; Floating-point add to a global using an immediate. 879 define void @fadd_32g() { 880 ; X64-LABEL: fadd_32g: 881 ; X64-NOT: lock 882 ; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] 883 ; X64-NEXT: addss glob32(%rip), %[[XMM]] 884 ; X64-NEXT: movss %[[XMM]], glob32(%rip) 885 ; X32-LABEL: fadd_32g: 886 ; Don't check x86-32 (see comment above). 887 %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 888 %f = bitcast i32 %i to float 889 %add = fadd float %f, 1.000000e+00 890 %s = bitcast float %add to i32 891 store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 892 ret void 893 } 894 895 define void @fadd_64g() { 896 ; X64-LABEL: fadd_64g: 897 ; X64-NOT: lock 898 ; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] 899 ; X64-NEXT: addsd glob64(%rip), %[[XMM]] 900 ; X64-NEXT: movsd %[[XMM]], glob64(%rip) 901 ; X32-LABEL: fadd_64g: 902 ; Don't check x86-32 (see comment above). 903 %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 904 %f = bitcast i64 %i to double 905 %add = fadd double %f, 1.000000e+00 906 %s = bitcast double %add to i64 907 store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 908 ret void 909 } 910 911 ; Floating-point add to a hard-coded immediate location using an immediate. 912 define void @fadd_32imm() { 913 ; X64-LABEL: fadd_32imm: 914 ; X64-NOT: lock 915 ; X64: movl $3735928559, %e[[M:[a-z]+]] 916 ; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] 917 ; X64-NEXT: addss (%r[[M]]), %[[XMM]] 918 ; X64-NEXT: movss %[[XMM]], (%r[[M]]) 919 ; X32-LABEL: fadd_32imm: 920 ; Don't check x86-32 (see comment above). 921 %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 922 %f = bitcast i32 %i to float 923 %add = fadd float %f, 1.000000e+00 924 %s = bitcast float %add to i32 925 store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 926 ret void 927 } 928 929 define void @fadd_64imm() { 930 ; X64-LABEL: fadd_64imm: 931 ; X64-NOT: lock 932 ; X64: movl $3735928559, %e[[M:[a-z]+]] 933 ; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] 934 ; X64-NEXT: addsd (%r[[M]]), %[[XMM]] 935 ; X64-NEXT: movsd %[[XMM]], (%r[[M]]) 936 ; X32-LABEL: fadd_64imm: 937 ; Don't check x86-32 (see comment above). 938 %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 939 %f = bitcast i64 %i to double 940 %add = fadd double %f, 1.000000e+00 941 %s = bitcast double %add to i64 942 store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 943 ret void 944 } 945 946 ; Floating-point add to a stack location. 947 define void @fadd_32stack() { 948 ; X64-LABEL: fadd_32stack: 949 ; X64-NOT: lock 950 ; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] 951 ; X64-NEXT: addss [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]] 952 ; X64-NEXT: movss %[[XMM]], [[STACKOFF]](%rsp) 953 ; X32-LABEL: fadd_32stack: 954 ; Don't check x86-32 (see comment above). 955 %ptr = alloca i32, align 4 956 %bc3 = bitcast i32* %ptr to float* 957 %load = load atomic i32, i32* %ptr acquire, align 4 958 %bc0 = bitcast i32 %load to float 959 %fadd = fadd float 1.000000e+00, %bc0 960 %bc1 = bitcast float %fadd to i32 961 store atomic i32 %bc1, i32* %ptr release, align 4 962 ret void 963 } 964 965 define void @fadd_64stack() { 966 ; X64-LABEL: fadd_64stack: 967 ; X64-NOT: lock 968 ; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] 969 ; X64-NEXT: addsd [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]] 970 ; X64-NEXT: movsd %[[XMM]], [[STACKOFF]](%rsp) 971 ; X32-LABEL: fadd_64stack: 972 ; Don't check x86-32 (see comment above). 973 %ptr = alloca i64, align 8 974 %bc3 = bitcast i64* %ptr to double* 975 %load = load atomic i64, i64* %ptr acquire, align 8 976 %bc0 = bitcast i64 %load to double 977 %fadd = fadd double 1.000000e+00, %bc0 978 %bc1 = bitcast double %fadd to i64 979 store atomic i64 %bc1, i64* %ptr release, align 8 980 ret void 981 } 982