1 ; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt 2 ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics,+sign-ext | FileCheck %s 3 4 ; Test that atomic loads are assembled properly. 5 6 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 7 target triple = "wasm32-unknown-unknown" 8 9 ;===---------------------------------------------------------------------------- 10 ; Atomic loads: 32-bit 11 ;===---------------------------------------------------------------------------- 12 13 ; Basic load. 14 15 ; CHECK-LABEL: load_i32_no_offset: 16 ; CHECK: i32.atomic.load $push0=, 0($0){{$}} 17 ; CHECK-NEXT: return $pop0{{$}} 18 define i32 @load_i32_no_offset(i32 *%p) { 19 %v = load atomic i32, i32* %p seq_cst, align 4 20 ret i32 %v 21 } 22 23 ; With an nuw add, we can fold an offset. 24 25 ; CHECK-LABEL: load_i32_with_folded_offset: 26 ; CHECK: i32.atomic.load $push0=, 24($0){{$}} 27 define i32 @load_i32_with_folded_offset(i32* %p) { 28 %q = ptrtoint i32* %p to i32 29 %r = add nuw i32 %q, 24 30 %s = inttoptr i32 %r to i32* 31 %t = load atomic i32, i32* %s seq_cst, align 4 32 ret i32 %t 33 } 34 35 ; With an inbounds gep, we can fold an offset. 36 37 ; CHECK-LABEL: load_i32_with_folded_gep_offset: 38 ; CHECK: i32.atomic.load $push0=, 24($0){{$}} 39 define i32 @load_i32_with_folded_gep_offset(i32* %p) { 40 %s = getelementptr inbounds i32, i32* %p, i32 6 41 %t = load atomic i32, i32* %s seq_cst, align 4 42 ret i32 %t 43 } 44 45 ; We can't fold a negative offset though, even with an inbounds gep. 46 47 ; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: 48 ; CHECK: i32.const $push0=, -24{{$}} 49 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 50 ; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 51 define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) { 52 %s = getelementptr inbounds i32, i32* %p, i32 -6 53 %t = load atomic i32, i32* %s seq_cst, align 4 54 ret i32 %t 55 } 56 57 ; Without nuw, and even with nsw, we can't fold an offset. 58 59 ; CHECK-LABEL: load_i32_with_unfolded_offset: 60 ; CHECK: i32.const $push0=, 24{{$}} 61 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 62 ; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 63 define i32 @load_i32_with_unfolded_offset(i32* %p) { 64 %q = ptrtoint i32* %p to i32 65 %r = add nsw i32 %q, 24 66 %s = inttoptr i32 %r to i32* 67 %t = load atomic i32, i32* %s seq_cst, align 4 68 ret i32 %t 69 } 70 71 ; Without inbounds, we can't fold a gep offset. 72 73 ; CHECK-LABEL: load_i32_with_unfolded_gep_offset: 74 ; CHECK: i32.const $push0=, 24{{$}} 75 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 76 ; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} 77 define i32 @load_i32_with_unfolded_gep_offset(i32* %p) { 78 %s = getelementptr i32, i32* %p, i32 6 79 %t = load atomic i32, i32* %s seq_cst, align 4 80 ret i32 %t 81 } 82 83 ; When loading from a fixed address, materialize a zero. 84 85 ; CHECK-LABEL: load_i32_from_numeric_address 86 ; CHECK: i32.const $push0=, 0{{$}} 87 ; CHECK: i32.atomic.load $push1=, 42($pop0){{$}} 88 define i32 @load_i32_from_numeric_address() { 89 %s = inttoptr i32 42 to i32* 90 %t = load atomic i32, i32* %s seq_cst, align 4 91 ret i32 %t 92 } 93 94 ; CHECK-LABEL: load_i32_from_global_address 95 ; CHECK: i32.const $push0=, 0{{$}} 96 ; CHECK: i32.atomic.load $push1=, gv($pop0){{$}} 97 @gv = global i32 0 98 define i32 @load_i32_from_global_address() { 99 %t = load atomic i32, i32* @gv seq_cst, align 4 100 ret i32 %t 101 } 102 103 ;===---------------------------------------------------------------------------- 104 ; Atomic loads: 64-bit 105 ;===---------------------------------------------------------------------------- 106 107 ; Basic load. 108 109 ; CHECK-LABEL: load_i64_no_offset: 110 ; CHECK: i64.atomic.load $push0=, 0($0){{$}} 111 ; CHECK-NEXT: return $pop0{{$}} 112 define i64 @load_i64_no_offset(i64 *%p) { 113 %v = load atomic i64, i64* %p seq_cst, align 8 114 ret i64 %v 115 } 116 117 ; With an nuw add, we can fold an offset. 118 119 ; CHECK-LABEL: load_i64_with_folded_offset: 120 ; CHECK: i64.atomic.load $push0=, 24($0){{$}} 121 define i64 @load_i64_with_folded_offset(i64* %p) { 122 %q = ptrtoint i64* %p to i32 123 %r = add nuw i32 %q, 24 124 %s = inttoptr i32 %r to i64* 125 %t = load atomic i64, i64* %s seq_cst, align 8 126 ret i64 %t 127 } 128 129 ; With an inbounds gep, we can fold an offset. 130 131 ; CHECK-LABEL: load_i64_with_folded_gep_offset: 132 ; CHECK: i64.atomic.load $push0=, 24($0){{$}} 133 define i64 @load_i64_with_folded_gep_offset(i64* %p) { 134 %s = getelementptr inbounds i64, i64* %p, i32 3 135 %t = load atomic i64, i64* %s seq_cst, align 8 136 ret i64 %t 137 } 138 139 ; We can't fold a negative offset though, even with an inbounds gep. 140 141 ; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: 142 ; CHECK: i32.const $push0=, -24{{$}} 143 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 144 ; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 145 define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) { 146 %s = getelementptr inbounds i64, i64* %p, i32 -3 147 %t = load atomic i64, i64* %s seq_cst, align 8 148 ret i64 %t 149 } 150 151 ; Without nuw, and even with nsw, we can't fold an offset. 152 153 ; CHECK-LABEL: load_i64_with_unfolded_offset: 154 ; CHECK: i32.const $push0=, 24{{$}} 155 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 156 ; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 157 define i64 @load_i64_with_unfolded_offset(i64* %p) { 158 %q = ptrtoint i64* %p to i32 159 %r = add nsw i32 %q, 24 160 %s = inttoptr i32 %r to i64* 161 %t = load atomic i64, i64* %s seq_cst, align 8 162 ret i64 %t 163 } 164 165 ; Without inbounds, we can't fold a gep offset. 166 167 ; CHECK-LABEL: load_i64_with_unfolded_gep_offset: 168 ; CHECK: i32.const $push0=, 24{{$}} 169 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 170 ; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} 171 define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { 172 %s = getelementptr i64, i64* %p, i32 3 173 %t = load atomic i64, i64* %s seq_cst, align 8 174 ret i64 %t 175 } 176 177 ;===---------------------------------------------------------------------------- 178 ; Atomic stores: 32-bit 179 ;===---------------------------------------------------------------------------- 180 181 ; Basic store. 182 183 ; CHECK-LABEL: store_i32_no_offset: 184 ; CHECK-NEXT: .param i32, i32{{$}} 185 ; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}} 186 ; CHECK-NEXT: return{{$}} 187 define void @store_i32_no_offset(i32 *%p, i32 %v) { 188 store atomic i32 %v, i32* %p seq_cst, align 4 189 ret void 190 } 191 192 ; With an nuw add, we can fold an offset. 193 194 ; CHECK-LABEL: store_i32_with_folded_offset: 195 ; CHECK: i32.atomic.store 24($0), $pop0{{$}} 196 define void @store_i32_with_folded_offset(i32* %p) { 197 %q = ptrtoint i32* %p to i32 198 %r = add nuw i32 %q, 24 199 %s = inttoptr i32 %r to i32* 200 store atomic i32 0, i32* %s seq_cst, align 4 201 ret void 202 } 203 204 ; With an inbounds gep, we can fold an offset. 205 206 ; CHECK-LABEL: store_i32_with_folded_gep_offset: 207 ; CHECK: i32.atomic.store 24($0), $pop0{{$}} 208 define void @store_i32_with_folded_gep_offset(i32* %p) { 209 %s = getelementptr inbounds i32, i32* %p, i32 6 210 store atomic i32 0, i32* %s seq_cst, align 4 211 ret void 212 } 213 214 ; We can't fold a negative offset though, even with an inbounds gep. 215 216 ; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: 217 ; CHECK: i32.const $push0=, -24{{$}} 218 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 219 ; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 220 define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) { 221 %s = getelementptr inbounds i32, i32* %p, i32 -6 222 store atomic i32 0, i32* %s seq_cst, align 4 223 ret void 224 } 225 226 ; Without nuw, and even with nsw, we can't fold an offset. 227 228 ; CHECK-LABEL: store_i32_with_unfolded_offset: 229 ; CHECK: i32.const $push0=, 24{{$}} 230 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 231 ; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 232 define void @store_i32_with_unfolded_offset(i32* %p) { 233 %q = ptrtoint i32* %p to i32 234 %r = add nsw i32 %q, 24 235 %s = inttoptr i32 %r to i32* 236 store atomic i32 0, i32* %s seq_cst, align 4 237 ret void 238 } 239 240 ; Without inbounds, we can't fold a gep offset. 241 242 ; CHECK-LABEL: store_i32_with_unfolded_gep_offset: 243 ; CHECK: i32.const $push0=, 24{{$}} 244 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 245 ; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} 246 define void @store_i32_with_unfolded_gep_offset(i32* %p) { 247 %s = getelementptr i32, i32* %p, i32 6 248 store atomic i32 0, i32* %s seq_cst, align 4 249 ret void 250 } 251 252 ; When storing from a fixed address, materialize a zero. 253 254 ; CHECK-LABEL: store_i32_to_numeric_address: 255 ; CHECK-NEXT: i32.const $push0=, 0{{$}} 256 ; CHECK-NEXT: i32.const $push1=, 0{{$}} 257 ; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}} 258 define void @store_i32_to_numeric_address() { 259 %s = inttoptr i32 42 to i32* 260 store atomic i32 0, i32* %s seq_cst, align 4 261 ret void 262 } 263 264 ; CHECK-LABEL: store_i32_to_global_address: 265 ; CHECK: i32.const $push0=, 0{{$}} 266 ; CHECK: i32.const $push1=, 0{{$}} 267 ; CHECK: i32.atomic.store gv($pop0), $pop1{{$}} 268 define void @store_i32_to_global_address() { 269 store atomic i32 0, i32* @gv seq_cst, align 4 270 ret void 271 } 272 273 ;===---------------------------------------------------------------------------- 274 ; Atomic stores: 64-bit 275 ;===---------------------------------------------------------------------------- 276 277 ; Basic store. 278 279 ; CHECK-LABEL: store_i64_no_offset: 280 ; CHECK-NEXT: .param i32, i64{{$}} 281 ; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}} 282 ; CHECK-NEXT: return{{$}} 283 define void @store_i64_no_offset(i64 *%p, i64 %v) { 284 store atomic i64 %v, i64* %p seq_cst, align 8 285 ret void 286 } 287 288 ; With an nuw add, we can fold an offset. 289 290 ; CHECK-LABEL: store_i64_with_folded_offset: 291 ; CHECK: i64.atomic.store 24($0), $pop0{{$}} 292 define void @store_i64_with_folded_offset(i64* %p) { 293 %q = ptrtoint i64* %p to i32 294 %r = add nuw i32 %q, 24 295 %s = inttoptr i32 %r to i64* 296 store atomic i64 0, i64* %s seq_cst, align 8 297 ret void 298 } 299 300 ; With an inbounds gep, we can fold an offset. 301 302 ; CHECK-LABEL: store_i64_with_folded_gep_offset: 303 ; CHECK: i64.atomic.store 24($0), $pop0{{$}} 304 define void @store_i64_with_folded_gep_offset(i64* %p) { 305 %s = getelementptr inbounds i64, i64* %p, i32 3 306 store atomic i64 0, i64* %s seq_cst, align 8 307 ret void 308 } 309 310 ; We can't fold a negative offset though, even with an inbounds gep. 311 312 ; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: 313 ; CHECK: i32.const $push0=, -24{{$}} 314 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 315 ; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 316 define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) { 317 %s = getelementptr inbounds i64, i64* %p, i32 -3 318 store atomic i64 0, i64* %s seq_cst, align 8 319 ret void 320 } 321 322 ; Without nuw, and even with nsw, we can't fold an offset. 323 324 ; CHECK-LABEL: store_i64_with_unfolded_offset: 325 ; CHECK: i32.const $push0=, 24{{$}} 326 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 327 ; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 328 define void @store_i64_with_unfolded_offset(i64* %p) { 329 %q = ptrtoint i64* %p to i32 330 %r = add nsw i32 %q, 24 331 %s = inttoptr i32 %r to i64* 332 store atomic i64 0, i64* %s seq_cst, align 8 333 ret void 334 } 335 336 ; Without inbounds, we can't fold a gep offset. 337 338 ; CHECK-LABEL: store_i64_with_unfolded_gep_offset: 339 ; CHECK: i32.const $push0=, 24{{$}} 340 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 341 ; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} 342 define void @store_i64_with_unfolded_gep_offset(i64* %p) { 343 %s = getelementptr i64, i64* %p, i32 3 344 store atomic i64 0, i64* %s seq_cst, align 8 345 ret void 346 } 347 348 ;===---------------------------------------------------------------------------- 349 ; Atomic sign-extending loads 350 ;===---------------------------------------------------------------------------- 351 352 ; Fold an offset into a sign-extending load. 353 354 ; CHECK-LABEL: load_i8_i32_s_with_folded_offset: 355 ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 356 ; CHECK-NEXT: i32.extend8_s $push1=, $pop0 357 define i32 @load_i8_i32_s_with_folded_offset(i8* %p) { 358 %q = ptrtoint i8* %p to i32 359 %r = add nuw i32 %q, 24 360 %s = inttoptr i32 %r to i8* 361 %t = load atomic i8, i8* %s seq_cst, align 1 362 %u = sext i8 %t to i32 363 ret i32 %u 364 } 365 366 ; 32->64 sext load gets selected as i32.atomic.load, i64_extend_s/i32 367 ; CHECK-LABEL: load_i32_i64_s_with_folded_offset: 368 ; CHECK: i32.atomic.load $push0=, 24($0){{$}} 369 ; CHECK-NEXT: i64.extend_s/i32 $push1=, $pop0{{$}} 370 define i64 @load_i32_i64_s_with_folded_offset(i32* %p) { 371 %q = ptrtoint i32* %p to i32 372 %r = add nuw i32 %q, 24 373 %s = inttoptr i32 %r to i32* 374 %t = load atomic i32, i32* %s seq_cst, align 4 375 %u = sext i32 %t to i64 376 ret i64 %u 377 } 378 379 ; Fold a gep offset into a sign-extending load. 380 381 ; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset: 382 ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 383 ; CHECK-NEXT: i32.extend8_s $push1=, $pop0 384 define i32 @load_i8_i32_s_with_folded_gep_offset(i8* %p) { 385 %s = getelementptr inbounds i8, i8* %p, i32 24 386 %t = load atomic i8, i8* %s seq_cst, align 1 387 %u = sext i8 %t to i32 388 ret i32 %u 389 } 390 391 ; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset: 392 ; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} 393 ; CHECK-NEXT: i32.extend16_s $push1=, $pop0 394 define i32 @load_i16_i32_s_with_folded_gep_offset(i16* %p) { 395 %s = getelementptr inbounds i16, i16* %p, i32 24 396 %t = load atomic i16, i16* %s seq_cst, align 2 397 %u = sext i16 %t to i32 398 ret i32 %u 399 } 400 401 ; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset: 402 ; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} 403 ; CHECK-NEXT: i64.extend16_s $push1=, $pop0 404 define i64 @load_i16_i64_s_with_folded_gep_offset(i16* %p) { 405 %s = getelementptr inbounds i16, i16* %p, i32 24 406 %t = load atomic i16, i16* %s seq_cst, align 2 407 %u = sext i16 %t to i64 408 ret i64 %u 409 } 410 411 ; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 412 ; an 'add' if the or'ed bits are known to be zero. 413 414 ; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset: 415 ; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 416 ; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 417 define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) { 418 %and = and i32 %x, -4 419 %t0 = inttoptr i32 %and to i8* 420 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 421 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 422 %conv = sext i8 %t1 to i32 423 ret i32 %conv 424 } 425 426 ; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset: 427 ; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 428 ; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 429 define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) { 430 %and = and i32 %x, -4 431 %t0 = inttoptr i32 %and to i8* 432 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 433 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 434 %conv = sext i8 %t1 to i64 435 ret i64 %conv 436 } 437 438 ; When loading from a fixed address, materialize a zero. 439 440 ; CHECK-LABEL: load_i16_i32_s_from_numeric_address 441 ; CHECK: i32.const $push0=, 0{{$}} 442 ; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} 443 ; CHECK-NEXT: i32.extend16_s $push2=, $pop1 444 define i32 @load_i16_i32_s_from_numeric_address() { 445 %s = inttoptr i32 42 to i16* 446 %t = load atomic i16, i16* %s seq_cst, align 2 447 %u = sext i16 %t to i32 448 ret i32 %u 449 } 450 451 ; CHECK-LABEL: load_i8_i32_s_from_global_address 452 ; CHECK: i32.const $push0=, 0{{$}} 453 ; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} 454 ; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 455 @gv8 = global i8 0 456 define i32 @load_i8_i32_s_from_global_address() { 457 %t = load atomic i8, i8* @gv8 seq_cst, align 1 458 %u = sext i8 %t to i32 459 ret i32 %u 460 } 461 462 ;===---------------------------------------------------------------------------- 463 ; Atomic zero-extending loads 464 ;===---------------------------------------------------------------------------- 465 466 ; Fold an offset into a zero-extending load. 467 468 ; CHECK-LABEL: load_i8_i32_z_with_folded_offset: 469 ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 470 define i32 @load_i8_i32_z_with_folded_offset(i8* %p) { 471 %q = ptrtoint i8* %p to i32 472 %r = add nuw i32 %q, 24 473 %s = inttoptr i32 %r to i8* 474 %t = load atomic i8, i8* %s seq_cst, align 1 475 %u = zext i8 %t to i32 476 ret i32 %u 477 } 478 479 ; CHECK-LABEL: load_i32_i64_z_with_folded_offset: 480 ; CHECK: i64.atomic.load32_u $push0=, 24($0){{$}} 481 define i64 @load_i32_i64_z_with_folded_offset(i32* %p) { 482 %q = ptrtoint i32* %p to i32 483 %r = add nuw i32 %q, 24 484 %s = inttoptr i32 %r to i32* 485 %t = load atomic i32, i32* %s seq_cst, align 4 486 %u = zext i32 %t to i64 487 ret i64 %u 488 } 489 490 ; Fold a gep offset into a zero-extending load. 491 492 ; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset: 493 ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} 494 define i32 @load_i8_i32_z_with_folded_gep_offset(i8* %p) { 495 %s = getelementptr inbounds i8, i8* %p, i32 24 496 %t = load atomic i8, i8* %s seq_cst, align 1 497 %u = zext i8 %t to i32 498 ret i32 %u 499 } 500 501 ; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset: 502 ; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} 503 define i32 @load_i16_i32_z_with_folded_gep_offset(i16* %p) { 504 %s = getelementptr inbounds i16, i16* %p, i32 24 505 %t = load atomic i16, i16* %s seq_cst, align 2 506 %u = zext i16 %t to i32 507 ret i32 %u 508 } 509 510 ; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset: 511 ; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} 512 define i64 @load_i16_i64_z_with_folded_gep_offset(i16* %p) { 513 %s = getelementptr inbounds i16, i16* %p, i64 24 514 %t = load atomic i16, i16* %s seq_cst, align 2 515 %u = zext i16 %t to i64 516 ret i64 %u 517 } 518 519 ; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 520 ; an 'add' if the or'ed bits are known to be zero. 521 522 ; CHECK-LABEL: load_i8_i32_z_with_folded_or_offset: 523 ; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 524 define i32 @load_i8_i32_z_with_folded_or_offset(i32 %x) { 525 %and = and i32 %x, -4 526 %t0 = inttoptr i32 %and to i8* 527 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 528 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 529 %conv = zext i8 %t1 to i32 530 ret i32 %conv 531 } 532 533 ; CHECK-LABEL: load_i8_i64_z_with_folded_or_offset: 534 ; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} 535 define i64 @load_i8_i64_z_with_folded_or_offset(i32 %x) { 536 %and = and i32 %x, -4 537 %t0 = inttoptr i32 %and to i8* 538 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 539 %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 540 %conv = zext i8 %t1 to i64 541 ret i64 %conv 542 } 543 544 ; When loading from a fixed address, materialize a zero. 545 546 ; CHECK-LABEL: load_i16_i32_z_from_numeric_address 547 ; CHECK: i32.const $push0=, 0{{$}} 548 ; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} 549 define i32 @load_i16_i32_z_from_numeric_address() { 550 %s = inttoptr i32 42 to i16* 551 %t = load atomic i16, i16* %s seq_cst, align 2 552 %u = zext i16 %t to i32 553 ret i32 %u 554 } 555 556 ; CHECK-LABEL: load_i8_i32_z_from_global_address 557 ; CHECK: i32.const $push0=, 0{{$}} 558 ; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} 559 define i32 @load_i8_i32_z_from_global_address() { 560 %t = load atomic i8, i8* @gv8 seq_cst, align 1 561 %u = zext i8 %t to i32 562 ret i32 %u 563 } 564 565 ; i8 return value should test anyext loads 566 567 ; CHECK-LABEL: load_i8_i32_retvalue: 568 ; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}} 569 ; CHECK-NEXT: return $pop0{{$}} 570 define i8 @load_i8_i32_retvalue(i8 *%p) { 571 %v = load atomic i8, i8* %p seq_cst, align 1 572 ret i8 %v 573 } 574 575 ;===---------------------------------------------------------------------------- 576 ; Atomic truncating stores 577 ;===---------------------------------------------------------------------------- 578 579 ; Fold an offset into a truncating store. 580 581 ; CHECK-LABEL: store_i8_i32_with_folded_offset: 582 ; CHECK: i32.atomic.store8 24($0), $1{{$}} 583 define void @store_i8_i32_with_folded_offset(i8* %p, i32 %v) { 584 %q = ptrtoint i8* %p to i32 585 %r = add nuw i32 %q, 24 586 %s = inttoptr i32 %r to i8* 587 %t = trunc i32 %v to i8 588 store atomic i8 %t, i8* %s seq_cst, align 1 589 ret void 590 } 591 592 ; CHECK-LABEL: store_i32_i64_with_folded_offset: 593 ; CHECK: i64.atomic.store32 24($0), $1{{$}} 594 define void @store_i32_i64_with_folded_offset(i32* %p, i64 %v) { 595 %q = ptrtoint i32* %p to i32 596 %r = add nuw i32 %q, 24 597 %s = inttoptr i32 %r to i32* 598 %t = trunc i64 %v to i32 599 store atomic i32 %t, i32* %s seq_cst, align 4 600 ret void 601 } 602 603 ; Fold a gep offset into a truncating store. 604 605 ; CHECK-LABEL: store_i8_i32_with_folded_gep_offset: 606 ; CHECK: i32.atomic.store8 24($0), $1{{$}} 607 define void @store_i8_i32_with_folded_gep_offset(i8* %p, i32 %v) { 608 %s = getelementptr inbounds i8, i8* %p, i32 24 609 %t = trunc i32 %v to i8 610 store atomic i8 %t, i8* %s seq_cst, align 1 611 ret void 612 } 613 614 ; CHECK-LABEL: store_i16_i32_with_folded_gep_offset: 615 ; CHECK: i32.atomic.store16 48($0), $1{{$}} 616 define void @store_i16_i32_with_folded_gep_offset(i16* %p, i32 %v) { 617 %s = getelementptr inbounds i16, i16* %p, i32 24 618 %t = trunc i32 %v to i16 619 store atomic i16 %t, i16* %s seq_cst, align 2 620 ret void 621 } 622 623 ; CHECK-LABEL: store_i16_i64_with_folded_gep_offset: 624 ; CHECK: i64.atomic.store16 48($0), $1{{$}} 625 define void @store_i16_i64_with_folded_gep_offset(i16* %p, i64 %v) { 626 %s = getelementptr inbounds i16, i16* %p, i32 24 627 %t = trunc i64 %v to i16 628 store atomic i16 %t, i16* %s seq_cst, align 2 629 ret void 630 } 631 632 ; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 633 ; an 'add' if the or'ed bits are known to be zero. 634 635 ; CHECK-LABEL: store_i8_i32_with_folded_or_offset: 636 ; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} 637 define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) { 638 %and = and i32 %x, -4 639 %p = inttoptr i32 %and to i8* 640 %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 641 %t = trunc i32 %v to i8 642 store atomic i8 %t, i8* %arrayidx seq_cst, align 1 643 ret void 644 } 645 646 ; CHECK-LABEL: store_i8_i64_with_folded_or_offset: 647 ; CHECK: i64.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} 648 define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) { 649 %and = and i32 %x, -4 650 %p = inttoptr i32 %and to i8* 651 %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 652 %t = trunc i64 %v to i8 653 store atomic i8 %t, i8* %arrayidx seq_cst, align 1 654 ret void 655 } 656 657 ;===---------------------------------------------------------------------------- 658 ; Atomic binary read-modify-writes: 32-bit 659 ;===---------------------------------------------------------------------------- 660 661 ; There are several RMW instructions, but here we only test 'add' as an example. 662 663 ; Basic RMW. 664 665 ; CHECK-LABEL: rmw_add_i32_no_offset: 666 ; CHECK-NEXT: .param i32, i32{{$}} 667 ; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}} 668 ; CHECK-NEXT: return $pop0{{$}} 669 define i32 @rmw_add_i32_no_offset(i32* %p, i32 %v) { 670 %old = atomicrmw add i32* %p, i32 %v seq_cst 671 ret i32 %old 672 } 673 674 ; With an nuw add, we can fold an offset. 675 676 ; CHECK-LABEL: rmw_add_i32_with_folded_offset: 677 ; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} 678 define i32 @rmw_add_i32_with_folded_offset(i32* %p, i32 %v) { 679 %q = ptrtoint i32* %p to i32 680 %r = add nuw i32 %q, 24 681 %s = inttoptr i32 %r to i32* 682 %old = atomicrmw add i32* %s, i32 %v seq_cst 683 ret i32 %old 684 } 685 686 ; With an inbounds gep, we can fold an offset. 687 688 ; CHECK-LABEL: rmw_add_i32_with_folded_gep_offset: 689 ; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}} 690 define i32 @rmw_add_i32_with_folded_gep_offset(i32* %p, i32 %v) { 691 %s = getelementptr inbounds i32, i32* %p, i32 6 692 %old = atomicrmw add i32* %s, i32 %v seq_cst 693 ret i32 %old 694 } 695 696 ; We can't fold a negative offset though, even with an inbounds gep. 697 698 ; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_negative_offset: 699 ; CHECK: i32.const $push0=, -24{{$}} 700 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 701 ; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 702 define i32 @rmw_add_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %v) { 703 %s = getelementptr inbounds i32, i32* %p, i32 -6 704 %old = atomicrmw add i32* %s, i32 %v seq_cst 705 ret i32 %old 706 } 707 708 ; Without nuw, and even with nsw, we can't fold an offset. 709 710 ; CHECK-LABEL: rmw_add_i32_with_unfolded_offset: 711 ; CHECK: i32.const $push0=, 24{{$}} 712 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 713 ; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 714 define i32 @rmw_add_i32_with_unfolded_offset(i32* %p, i32 %v) { 715 %q = ptrtoint i32* %p to i32 716 %r = add nsw i32 %q, 24 717 %s = inttoptr i32 %r to i32* 718 %old = atomicrmw add i32* %s, i32 %v seq_cst 719 ret i32 %old 720 } 721 722 ; Without inbounds, we can't fold a gep offset. 723 724 ; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_offset: 725 ; CHECK: i32.const $push0=, 24{{$}} 726 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 727 ; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 728 define i32 @rmw_add_i32_with_unfolded_gep_offset(i32* %p, i32 %v) { 729 %s = getelementptr i32, i32* %p, i32 6 730 %old = atomicrmw add i32* %s, i32 %v seq_cst 731 ret i32 %old 732 } 733 734 ; When loading from a fixed address, materialize a zero. 735 736 ; CHECK-LABEL: rmw_add_i32_from_numeric_address 737 ; CHECK: i32.const $push0=, 0{{$}} 738 ; CHECK: i32.atomic.rmw.add $push1=, 42($pop0), $0{{$}} 739 define i32 @rmw_add_i32_from_numeric_address(i32 %v) { 740 %s = inttoptr i32 42 to i32* 741 %old = atomicrmw add i32* %s, i32 %v seq_cst 742 ret i32 %old 743 } 744 745 ; CHECK-LABEL: rmw_add_i32_from_global_address 746 ; CHECK: i32.const $push0=, 0{{$}} 747 ; CHECK: i32.atomic.rmw.add $push1=, gv($pop0), $0{{$}} 748 define i32 @rmw_add_i32_from_global_address(i32 %v) { 749 %old = atomicrmw add i32* @gv, i32 %v seq_cst 750 ret i32 %old 751 } 752 753 ;===---------------------------------------------------------------------------- 754 ; Atomic binary read-modify-writes: 64-bit 755 ;===---------------------------------------------------------------------------- 756 757 ; Basic RMW. 758 759 ; CHECK-LABEL: rmw_add_i64_no_offset: 760 ; CHECK-NEXT: .param i32, i64{{$}} 761 ; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}} 762 ; CHECK-NEXT: return $pop0{{$}} 763 define i64 @rmw_add_i64_no_offset(i64* %p, i64 %v) { 764 %old = atomicrmw add i64* %p, i64 %v seq_cst 765 ret i64 %old 766 } 767 768 ; With an nuw add, we can fold an offset. 769 770 ; CHECK-LABEL: rmw_add_i64_with_folded_offset: 771 ; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} 772 define i64 @rmw_add_i64_with_folded_offset(i64* %p, i64 %v) { 773 %q = ptrtoint i64* %p to i32 774 %r = add nuw i32 %q, 24 775 %s = inttoptr i32 %r to i64* 776 %old = atomicrmw add i64* %s, i64 %v seq_cst 777 ret i64 %old 778 } 779 780 ; With an inbounds gep, we can fold an offset. 781 782 ; CHECK-LABEL: rmw_add_i64_with_folded_gep_offset: 783 ; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}} 784 define i64 @rmw_add_i64_with_folded_gep_offset(i64* %p, i64 %v) { 785 %s = getelementptr inbounds i64, i64* %p, i32 3 786 %old = atomicrmw add i64* %s, i64 %v seq_cst 787 ret i64 %old 788 } 789 790 ; We can't fold a negative offset though, even with an inbounds gep. 791 792 ; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_negative_offset: 793 ; CHECK: i32.const $push0=, -24{{$}} 794 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 795 ; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 796 define i64 @rmw_add_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %v) { 797 %s = getelementptr inbounds i64, i64* %p, i32 -3 798 %old = atomicrmw add i64* %s, i64 %v seq_cst 799 ret i64 %old 800 } 801 802 ; Without nuw, and even with nsw, we can't fold an offset. 803 804 ; CHECK-LABEL: rmw_add_i64_with_unfolded_offset: 805 ; CHECK: i32.const $push0=, 24{{$}} 806 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 807 ; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 808 define i64 @rmw_add_i64_with_unfolded_offset(i64* %p, i64 %v) { 809 %q = ptrtoint i64* %p to i32 810 %r = add nsw i32 %q, 24 811 %s = inttoptr i32 %r to i64* 812 %old = atomicrmw add i64* %s, i64 %v seq_cst 813 ret i64 %old 814 } 815 816 ; Without inbounds, we can't fold a gep offset. 817 818 ; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_offset: 819 ; CHECK: i32.const $push0=, 24{{$}} 820 ; CHECK: i32.add $push1=, $0, $pop0{{$}} 821 ; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}} 822 define i64 @rmw_add_i64_with_unfolded_gep_offset(i64* %p, i64 %v) { 823 %s = getelementptr i64, i64* %p, i32 3 824 %old = atomicrmw add i64* %s, i64 %v seq_cst 825 ret i64 %old 826 } 827 828 ;===---------------------------------------------------------------------------- 829 ; Atomic truncating & sign-extending binary RMWs 830 ;===---------------------------------------------------------------------------- 831 832 ; Fold an offset into a sign-extending rmw. 833 834 ; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_offset: 835 ; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} 836 ; CHECK-NEXT: i32.extend8_s $push1=, $pop0 837 define i32 @rmw_add_i8_i32_s_with_folded_offset(i8* %p, i32 %v) { 838 %q = ptrtoint i8* %p to i32 839 %r = add nuw i32 %q, 24 840 %s = inttoptr i32 %r to i8* 841 %t = trunc i32 %v to i8 842 %old = atomicrmw add i8* %s, i8 %t seq_cst 843 %u = sext i8 %old to i32 844 ret i32 %u 845 } 846 847 ; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64_extend_s/i32 848 ; CHECK-LABEL: rmw_add_i32_i64_s_with_folded_offset: 849 ; CHECK: i32.wrap/i64 $push0=, $1 850 ; CHECK-NEXT: i32.atomic.rmw.add $push1=, 24($0), $pop0{{$}} 851 ; CHECK-NEXT: i64.extend_s/i32 $push2=, $pop1{{$}} 852 define i64 @rmw_add_i32_i64_s_with_folded_offset(i32* %p, i64 %v) { 853 %q = ptrtoint i32* %p to i32 854 %r = add nuw i32 %q, 24 855 %s = inttoptr i32 %r to i32* 856 %t = trunc i64 %v to i32 857 %old = atomicrmw add i32* %s, i32 %t seq_cst 858 %u = sext i32 %old to i64 859 ret i64 %u 860 } 861 862 ; Fold a gep offset into a sign-extending rmw. 863 864 ; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_gep_offset: 865 ; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} 866 ; CHECK-NEXT: i32.extend8_s $push1=, $pop0 867 define i32 @rmw_add_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %v) { 868 %s = getelementptr inbounds i8, i8* %p, i32 24 869 %t = trunc i32 %v to i8 870 %old = atomicrmw add i8* %s, i8 %t seq_cst 871 %u = sext i8 %old to i32 872 ret i32 %u 873 } 874 875 ; CHECK-LABEL: rmw_add_i16_i32_s_with_folded_gep_offset: 876 ; CHECK: i32.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} 877 ; CHECK-NEXT: i32.extend16_s $push1=, $pop0 878 define i32 @rmw_add_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %v) { 879 %s = getelementptr inbounds i16, i16* %p, i32 24 880 %t = trunc i32 %v to i16 881 %old = atomicrmw add i16* %s, i16 %t seq_cst 882 %u = sext i16 %old to i32 883 ret i32 %u 884 } 885 886 ; CHECK-LABEL: rmw_add_i16_i64_s_with_folded_gep_offset: 887 ; CHECK: i64.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} 888 ; CHECK-NEXT: i64.extend16_s $push1=, $pop0 889 define i64 @rmw_add_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %v) { 890 %s = getelementptr inbounds i16, i16* %p, i32 24 891 %t = trunc i64 %v to i16 892 %old = atomicrmw add i16* %s, i16 %t seq_cst 893 %u = sext i16 %old to i64 894 ret i64 %u 895 } 896 897 ; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 898 ; an 'add' if the or'ed bits are known to be zero. 899 900 ; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_or_offset: 901 ; CHECK: i32.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 902 ; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 903 define i32 @rmw_add_i8_i32_s_with_folded_or_offset(i32 %x, i32 %v) { 904 %and = and i32 %x, -4 905 %t0 = inttoptr i32 %and to i8* 906 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 907 %t = trunc i32 %v to i8 908 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 909 %conv = sext i8 %old to i32 910 ret i32 %conv 911 } 912 913 ; CHECK-LABEL: rmw_add_i8_i64_s_with_folded_or_offset: 914 ; CHECK: i64.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 915 ; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} 916 define i64 @rmw_add_i8_i64_s_with_folded_or_offset(i32 %x, i64 %v) { 917 %and = and i32 %x, -4 918 %t0 = inttoptr i32 %and to i8* 919 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 920 %t = trunc i64 %v to i8 921 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 922 %conv = sext i8 %old to i64 923 ret i64 %conv 924 } 925 926 ; When loading from a fixed address, materialize a zero. 927 928 ; CHECK-LABEL: rmw_add_i16_i32_s_from_numeric_address 929 ; CHECK: i32.const $push0=, 0{{$}} 930 ; CHECK: i32.atomic.rmw16_u.add $push1=, 42($pop0), $0{{$}} 931 ; CHECK-NEXT: i32.extend16_s $push2=, $pop1 932 define i32 @rmw_add_i16_i32_s_from_numeric_address(i32 %v) { 933 %s = inttoptr i32 42 to i16* 934 %t = trunc i32 %v to i16 935 %old = atomicrmw add i16* %s, i16 %t seq_cst 936 %u = sext i16 %old to i32 937 ret i32 %u 938 } 939 940 ; CHECK-LABEL: rmw_add_i8_i32_s_from_global_address 941 ; CHECK: i32.const $push0=, 0{{$}} 942 ; CHECK: i32.atomic.rmw8_u.add $push1=, gv8($pop0), $0{{$}} 943 ; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} 944 define i32 @rmw_add_i8_i32_s_from_global_address(i32 %v) { 945 %t = trunc i32 %v to i8 946 %old = atomicrmw add i8* @gv8, i8 %t seq_cst 947 %u = sext i8 %old to i32 948 ret i32 %u 949 } 950 951 ;===---------------------------------------------------------------------------- 952 ; Atomic truncating & zero-extending binary RMWs 953 ;===---------------------------------------------------------------------------- 954 955 ; Fold an offset into a zero-extending rmw. 956 957 ; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_offset: 958 ; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} 959 define i32 @rmw_add_i8_i32_z_with_folded_offset(i8* %p, i32 %v) { 960 %q = ptrtoint i8* %p to i32 961 %r = add nuw i32 %q, 24 962 %s = inttoptr i32 %r to i8* 963 %t = trunc i32 %v to i8 964 %old = atomicrmw add i8* %s, i8 %t seq_cst 965 %u = zext i8 %old to i32 966 ret i32 %u 967 } 968 969 ; CHECK-LABEL: rmw_add_i32_i64_z_with_folded_offset: 970 ; CHECK: i64.atomic.rmw32_u.add $push0=, 24($0), $1{{$}} 971 define i64 @rmw_add_i32_i64_z_with_folded_offset(i32* %p, i64 %v) { 972 %q = ptrtoint i32* %p to i32 973 %r = add nuw i32 %q, 24 974 %s = inttoptr i32 %r to i32* 975 %t = trunc i64 %v to i32 976 %old = atomicrmw add i32* %s, i32 %t seq_cst 977 %u = zext i32 %old to i64 978 ret i64 %u 979 } 980 981 ; Fold a gep offset into a zero-extending rmw. 982 983 ; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_gep_offset: 984 ; CHECK: i32.atomic.rmw8_u.add $push0=, 24($0), $1{{$}} 985 define i32 @rmw_add_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %v) { 986 %s = getelementptr inbounds i8, i8* %p, i32 24 987 %t = trunc i32 %v to i8 988 %old = atomicrmw add i8* %s, i8 %t seq_cst 989 %u = zext i8 %old to i32 990 ret i32 %u 991 } 992 993 ; CHECK-LABEL: rmw_add_i16_i32_z_with_folded_gep_offset: 994 ; CHECK: i32.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} 995 define i32 @rmw_add_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %v) { 996 %s = getelementptr inbounds i16, i16* %p, i32 24 997 %t = trunc i32 %v to i16 998 %old = atomicrmw add i16* %s, i16 %t seq_cst 999 %u = zext i16 %old to i32 1000 ret i32 %u 1001 } 1002 1003 ; CHECK-LABEL: rmw_add_i16_i64_z_with_folded_gep_offset: 1004 ; CHECK: i64.atomic.rmw16_u.add $push0=, 48($0), $1{{$}} 1005 define i64 @rmw_add_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %v) { 1006 %s = getelementptr inbounds i16, i16* %p, i32 24 1007 %t = trunc i64 %v to i16 1008 %old = atomicrmw add i16* %s, i16 %t seq_cst 1009 %u = zext i16 %old to i64 1010 ret i64 %u 1011 } 1012 1013 ; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as 1014 ; an 'add' if the or'ed bits are known to be zero. 1015 1016 ; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_or_offset: 1017 ; CHECK: i32.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 1018 define i32 @rmw_add_i8_i32_z_with_folded_or_offset(i32 %x, i32 %v) { 1019 %and = and i32 %x, -4 1020 %t0 = inttoptr i32 %and to i8* 1021 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1022 %t = trunc i32 %v to i8 1023 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 1024 %conv = zext i8 %old to i32 1025 ret i32 %conv 1026 } 1027 1028 ; CHECK-LABEL: rmw_add_i8_i64_z_with_folded_or_offset: 1029 ; CHECK: i64.atomic.rmw8_u.add $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}} 1030 define i64 @rmw_add_i8_i64_z_with_folded_or_offset(i32 %x, i64 %v) { 1031 %and = and i32 %x, -4 1032 %t0 = inttoptr i32 %and to i8* 1033 %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 1034 %t = trunc i64 %v to i8 1035 %old = atomicrmw add i8* %arrayidx, i8 %t seq_cst 1036 %conv = zext i8 %old to i64 1037 ret i64 %conv 1038 } 1039 1040 ; When loading from a fixed address, materialize a zero. 1041 1042 ; CHECK-LABEL: rmw_add_i16_i32_z_from_numeric_address 1043 ; CHECK: i32.const $push0=, 0{{$}} 1044 ; CHECK: i32.atomic.rmw16_u.add $push1=, 42($pop0), $0{{$}} 1045 define i32 @rmw_add_i16_i32_z_from_numeric_address(i32 %v) { 1046 %s = inttoptr i32 42 to i16* 1047 %t = trunc i32 %v to i16 1048 %old = atomicrmw add i16* %s, i16 %t seq_cst 1049 %u = zext i16 %old to i32 1050 ret i32 %u 1051 } 1052 1053 ; CHECK-LABEL: rmw_add_i8_i32_z_from_global_address 1054 ; CHECK: i32.const $push0=, 0{{$}} 1055 ; CHECK: i32.atomic.rmw8_u.add $push1=, gv8($pop0), $0{{$}} 1056 define i32 @rmw_add_i8_i32_z_from_global_address(i32 %v) { 1057 %t = trunc i32 %v to i8 1058 %old = atomicrmw add i8* @gv8, i8 %t seq_cst 1059 %u = zext i8 %old to i32 1060 ret i32 %u 1061 } 1062 1063 ; i8 return value should test anyext RMWs 1064 1065 ; CHECK-LABEL: rmw_add_i8_i32_retvalue: 1066 ; CHECK: i32.atomic.rmw8_u.add $push0=, 0($0), $1{{$}} 1067 ; CHECK-NEXT: return $pop0{{$}} 1068 define i8 @rmw_add_i8_i32_retvalue(i8 *%p, i32 %v) { 1069 %t = trunc i32 %v to i8 1070 %old = atomicrmw add i8* %p, i8 %t seq_cst 1071 ret i8 %old 1072 } 1073