1 ; This tests the NaCl intrinsics not related to atomic operations. 2 3 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 4 ; RUN: --target x8632 --sandbox -i %s --args -O2 \ 5 ; RUN: -allow-externally-defined-symbols \ 6 ; RUN: | %if --need=target_X8632 --command FileCheck %s 7 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 8 ; RUN: --target x8632 --sandbox -i %s --args -Om1 \ 9 ; RUN: -allow-externally-defined-symbols \ 10 ; RUN: | %if --need=target_X8632 --command FileCheck %s 11 12 ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 13 ; share the same "CHECK" prefix). This separate run helps check that 14 ; some code is optimized out. 15 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 16 ; RUN: --target x8632 --sandbox -i %s --args -O2 \ 17 ; RUN: -allow-externally-defined-symbols \ 18 ; RUN: | %if --need=target_X8632 \ 19 ; RUN: --command FileCheck --check-prefix=CHECKO2REM %s 20 21 ; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets 22 ; lowered to __nacl_read_tp instead of gs:0x0. 23 ; We also know that because it's O2, it'll have the O2REM optimizations. 24 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ 25 ; RUN: --target x8632 -i %s --args -O2 \ 26 ; RUN: -allow-externally-defined-symbols \ 27 ; RUN: | %if --need=target_X8632 \ 28 ; RUN: --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s 29 30 ; RUN: %if --need=target_ARM32 \ 31 ; RUN: --command %p2i --filetype=obj --disassemble --target arm32 \ 32 ; RUN: -i %s --args -O2 \ 33 ; RUN: -allow-externally-defined-symbols \ 34 ; RUN: | %if --need=target_ARM32 \ 35 ; RUN: --command FileCheck --check-prefix ARM32 %s 36 37 ; RUN: %if --need=target_MIPS32 --need=allow_dump \ 38 ; RUN: --command %p2i --filetype=asm --assemble --disassemble --target mips32\ 39 ; RUN: -i %s --args -Om1 --skip-unimplemented \ 40 ; RUN: -allow-externally-defined-symbols \ 41 ; RUN: | %if --need=target_MIPS32 --need=allow_dump \ 42 ; RUN: --command FileCheck --check-prefix MIPS32 %s 43 44 declare i8* @llvm.nacl.read.tp() 45 declare void @llvm.nacl.longjmp(i8*, i32) 46 declare i32 @llvm.nacl.setjmp(i8*) 47 declare float @llvm.sqrt.f32(float) 48 declare double @llvm.sqrt.f64(double) 49 declare float @llvm.fabs.f32(float) 50 declare double @llvm.fabs.f64(double) 51 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) 52 declare void @llvm.trap() 53 declare i16 @llvm.bswap.i16(i16) 54 declare i32 @llvm.bswap.i32(i32) 55 declare i64 @llvm.bswap.i64(i64) 56 declare i32 @llvm.ctlz.i32(i32, i1) 57 declare i64 @llvm.ctlz.i64(i64, i1) 58 declare i32 @llvm.cttz.i32(i32, i1) 59 declare i64 @llvm.cttz.i64(i64, i1) 60 declare i32 @llvm.ctpop.i32(i32) 61 declare i64 @llvm.ctpop.i64(i64) 62 declare i8* @llvm.stacksave() 63 declare void @llvm.stackrestore(i8*) 64 65 define internal i32 @test_nacl_read_tp() { 66 entry: 67 %ptr = call i8* @llvm.nacl.read.tp() 68 %__1 = ptrtoint i8* %ptr to i32 69 ret i32 %__1 70 } 71 ; CHECK-LABEL: test_nacl_read_tp 72 ; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 73 ; CHECKO2REM-LABEL: test_nacl_read_tp 74 ; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 75 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp 76 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp 77 ; MIPS32-LABEL: test_nacl_read_tp 78 ; MIPS32: jal {{.*}} __nacl_read_tp 79 80 define internal i32 @test_nacl_read_tp_more_addressing() { 81 entry: 82 %ptr = call i8* @llvm.nacl.read.tp() 83 %__1 = ptrtoint i8* %ptr to i32 84 %x = add i32 %__1, %__1 85 %__3 = inttoptr i32 %x to i32* 86 %v = load i32, i32* %__3, align 1 87 %v_add = add i32 %v, 1 88 89 %ptr2 = call i8* @llvm.nacl.read.tp() 90 %__6 = ptrtoint i8* %ptr2 to i32 91 %y = add i32 %__6, 4 92 %__8 = inttoptr i32 %y to i32* 93 %v_add2 = add i32 %v, 4 94 store i32 %v_add2, i32* %__8, align 1 95 ret i32 %v 96 } 97 ; CHECK-LABEL: test_nacl_read_tp_more_addressing 98 ; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 99 ; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 100 ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing 101 ; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 102 ; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0 103 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing 104 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp 105 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp 106 ; MIPS32-LABEL: test_nacl_read_tp_more_addressing 107 ; MIPS32: jal {{.*}} __nacl_read_tp 108 109 define internal i32 @test_nacl_read_tp_dead(i32 %a) { 110 entry: 111 %ptr = call i8* @llvm.nacl.read.tp() 112 ; Not actually using the result of nacl read tp call. 113 ; In O2 mode this should be DCE'ed. 114 ret i32 %a 115 } 116 ; Consider nacl.read.tp side-effect free, so it can be eliminated. 117 ; CHECKO2REM-LABEL: test_nacl_read_tp_dead 118 ; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:0x0 119 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead 120 ; CHECKO2UNSANDBOXEDREM-NOT: call {{.*}} R_{{.*}} __nacl_read_tp 121 ; MIPS32-LABEL: test_nacl_read_tp_dead 122 ; MIPS32: jal {{.*}} __nacl_read_tp 123 124 define internal i32 @test_setjmplongjmp(i32 %iptr_env) { 125 entry: 126 %env = inttoptr i32 %iptr_env to i8* 127 %i = call i32 @llvm.nacl.setjmp(i8* %env) 128 %r1 = icmp eq i32 %i, 0 129 br i1 %r1, label %Zero, label %NonZero 130 Zero: 131 ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. 132 %env2 = inttoptr i32 %iptr_env to i8* 133 call void @llvm.nacl.longjmp(i8* %env2, i32 1) 134 ret i32 0 135 NonZero: 136 ret i32 1 137 } 138 ; CHECK-LABEL: test_setjmplongjmp 139 ; CHECK: call {{.*}} R_{{.*}} setjmp 140 ; CHECK: call {{.*}} R_{{.*}} longjmp 141 ; CHECKO2REM-LABEL: test_setjmplongjmp 142 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp 143 ; CHECKO2REM: call {{.*}} R_{{.*}} longjmp 144 ; ARM32-LABEL: test_setjmplongjmp 145 ; ARM32: bl {{.*}} setjmp 146 ; ARM32: bl {{.*}} longjmp 147 ; MIPS32-LABEL: test_setjmplongjmp 148 ; MIPS32: jal {{.*}} setjmp 149 ; MIPS32: jal {{.*}} longjmp 150 151 define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { 152 entry: 153 %env = inttoptr i32 %iptr_env to i8* 154 %i = call i32 @llvm.nacl.setjmp(i8* %env) 155 ret i32 %i_other 156 } 157 ; Don't consider setjmp side-effect free, so it's not eliminated if 158 ; result unused. 159 ; CHECKO2REM-LABEL: test_setjmp_unused 160 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp 161 ; MIPS32-LABEL: test_setjmp_unused 162 ; MIPS32: jal {{.*}} setjmp 163 164 define internal float @test_sqrt_float(float %x, i32 %iptr) { 165 entry: 166 %r = call float @llvm.sqrt.f32(float %x) 167 %r2 = call float @llvm.sqrt.f32(float %r) 168 %r3 = call float @llvm.sqrt.f32(float -0.0) 169 %r4 = fadd float %r2, %r3 170 ret float %r4 171 } 172 ; CHECK-LABEL: test_sqrt_float 173 ; CHECK: sqrtss xmm{{.*}} 174 ; CHECK: sqrtss xmm{{.*}} 175 ; CHECK: sqrtss xmm{{.*}},DWORD PTR 176 ; ARM32-LABEL: test_sqrt_float 177 ; ARM32: vsqrt.f32 178 ; ARM32: vsqrt.f32 179 ; ARM32: vsqrt.f32 180 ; ARM32: vadd.f32 181 ; MIPS32-LABEL: test_sqrt_float 182 ; MIPS32: sqrt.s 183 ; MIPS32: sqrt.s 184 ; MIPS32: sqrt.s 185 ; MIPS32: add.s 186 187 define internal float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { 188 entry: 189 %__2 = inttoptr i32 %iptr to float* 190 %y = load float, float* %__2, align 4 191 %r5 = call float @llvm.sqrt.f32(float %y) 192 %r6 = fadd float %x, %r5 193 ret float %r6 194 } 195 ; CHECK-LABEL: test_sqrt_float_mergeable_load 196 ; We could fold the load and the sqrt into one operation, but the 197 ; current folding only handles load + arithmetic op. The sqrt inst 198 ; is considered an intrinsic call and not an arithmetic op. 199 ; CHECK: sqrtss xmm{{.*}} 200 ; ARM32-LABEL: test_sqrt_float_mergeable_load 201 ; ARM32: vldr s{{.*}} 202 ; ARM32: vsqrt.f32 203 204 define internal double @test_sqrt_double(double %x, i32 %iptr) { 205 entry: 206 %r = call double @llvm.sqrt.f64(double %x) 207 %r2 = call double @llvm.sqrt.f64(double %r) 208 %r3 = call double @llvm.sqrt.f64(double -0.0) 209 %r4 = fadd double %r2, %r3 210 ret double %r4 211 } 212 ; CHECK-LABEL: test_sqrt_double 213 ; CHECK: sqrtsd xmm{{.*}} 214 ; CHECK: sqrtsd xmm{{.*}} 215 ; CHECK: sqrtsd xmm{{.*}},QWORD PTR 216 ; ARM32-LABEL: test_sqrt_double 217 ; ARM32: vsqrt.f64 218 ; ARM32: vsqrt.f64 219 ; ARM32: vsqrt.f64 220 ; ARM32: vadd.f64 221 ; MIPS32-LABEL: test_sqrt_double 222 ; MIPS32: sqrt.d 223 ; MIPS32: sqrt.d 224 ; MIPS32: sqrt.d 225 ; MIPS32: add.d 226 227 define internal double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { 228 entry: 229 %__2 = inttoptr i32 %iptr to double* 230 %y = load double, double* %__2, align 8 231 %r5 = call double @llvm.sqrt.f64(double %y) 232 %r6 = fadd double %x, %r5 233 ret double %r6 234 } 235 ; CHECK-LABEL: test_sqrt_double_mergeable_load 236 ; CHECK: sqrtsd xmm{{.*}} 237 ; ARM32-LABEL: test_sqrt_double_mergeable_load 238 ; ARM32: vldr d{{.*}} 239 ; ARM32: vsqrt.f64 240 241 define internal float @test_sqrt_ignored(float %x, double %y) { 242 entry: 243 %ignored1 = call float @llvm.sqrt.f32(float %x) 244 %ignored2 = call double @llvm.sqrt.f64(double %y) 245 ret float 0.0 246 } 247 ; CHECKO2REM-LABEL: test_sqrt_ignored 248 ; CHECKO2REM-NOT: sqrtss 249 ; CHECKO2REM-NOT: sqrtsd 250 ; MIPS32-LABEL: test_sqrt_ignored 251 ; MIPS32: sqrt.s 252 ; MIPS32: sqrt.d 253 254 define internal float @test_fabs_float(float %x) { 255 entry: 256 %r = call float @llvm.fabs.f32(float %x) 257 %r2 = call float @llvm.fabs.f32(float %r) 258 %r3 = call float @llvm.fabs.f32(float -0.0) 259 %r4 = fadd float %r2, %r3 260 ret float %r4 261 } 262 ;;; Specially check that the pand instruction doesn't try to operate on a 32-bit 263 ;;; (f32) memory operand, and instead uses two xmm registers. 264 ; CHECK-LABEL: test_fabs_float 265 ; CHECK: pcmpeqd 266 ; CHECK: psrld 267 ; CHECK: pand {{.*}}xmm{{.*}}xmm 268 ; CHECK: pcmpeqd 269 ; CHECK: psrld 270 ; CHECK: pand {{.*}}xmm{{.*}}xmm 271 ; CHECK: pcmpeqd 272 ; CHECK: psrld 273 ; CHECK: pand {{.*}}xmm{{.*}}xmm 274 ; MIPS32-LABEL: test_fabs_float 275 ; MIPS32: abs.s 276 ; MIPS32: abs.s 277 ; MIPS32: abs.s 278 ; MIPS32: add.s 279 280 define internal double @test_fabs_double(double %x) { 281 entry: 282 %r = call double @llvm.fabs.f64(double %x) 283 %r2 = call double @llvm.fabs.f64(double %r) 284 %r3 = call double @llvm.fabs.f64(double -0.0) 285 %r4 = fadd double %r2, %r3 286 ret double %r4 287 } 288 ;;; Specially check that the pand instruction doesn't try to operate on a 64-bit 289 ;;; (f64) memory operand, and instead uses two xmm registers. 290 ; CHECK-LABEL: test_fabs_double 291 ; CHECK: pcmpeqd 292 ; CHECK: psrlq 293 ; CHECK: pand {{.*}}xmm{{.*}}xmm 294 ; CHECK: pcmpeqd 295 ; CHECK: psrlq 296 ; CHECK: pand {{.*}}xmm{{.*}}xmm 297 ; CHECK: pcmpeqd 298 ; CHECK: psrlq 299 ; CHECK: pand {{.*}}xmm{{.*}}xmm 300 ; MIPS32-LABEL: test_fabs_double 301 ; MIPS32: abs.d 302 ; MIPS32: abs.d 303 ; MIPS32: abs.d 304 ; MIPS32: add.d 305 306 define internal <4 x float> @test_fabs_v4f32(<4 x float> %x) { 307 entry: 308 %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) 309 %r2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %r) 310 %r3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) 311 %r4 = fadd <4 x float> %r2, %r3 312 ret <4 x float> %r4 313 } 314 ; CHECK-LABEL: test_fabs_v4f32 315 ; CHECK: pcmpeqd 316 ; CHECK: psrld 317 ; CHECK: pand 318 ; CHECK: pcmpeqd 319 ; CHECK: psrld 320 ; CHECK: pand 321 ; CHECK: pcmpeqd 322 ; CHECK: psrld 323 ; CHECK: pand 324 325 define internal i32 @test_trap(i32 %br) { 326 entry: 327 %r1 = icmp eq i32 %br, 0 328 br i1 %r1, label %Zero, label %NonZero 329 Zero: 330 call void @llvm.trap() 331 unreachable 332 NonZero: 333 ret i32 1 334 } 335 ; CHECK-LABEL: test_trap 336 ; CHECK: ud2 337 ; ARM32-LABEL: test_trap 338 ; ARM32: udf 339 ; MIPS32-LABEL: test_trap 340 ; MIPS32: teq zero,zero 341 342 define internal i32 @test_bswap_16(i32 %x) { 343 entry: 344 %x_trunc = trunc i32 %x to i16 345 %r = call i16 @llvm.bswap.i16(i16 %x_trunc) 346 %r_zext = zext i16 %r to i32 347 ret i32 %r_zext 348 } 349 ; CHECK-LABEL: test_bswap_16 350 ; Make sure this is the right operand size so that the most significant bit 351 ; to least significant bit rotation happens at the right boundary. 352 ; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8 353 ; ARM32-LABEL: test_bswap_16 354 ; ARM32: rev 355 ; ARM32: lsr {{.*}} #16 356 ; MIPS32-LABEL: test_bswap_16 357 ; MIPS32: sll {{.*}},0x8 358 ; MIPS32: lui {{.*}},0xff 359 ; MIPS32: and 360 ; MIPS32: sll {{.*}},0x18 361 ; MIPS32: or 362 ; MIPS32: srl {{.*}},0x10 363 ; MIPS32: andi {{.*}},0xffff 364 365 define internal i32 @test_bswap_32(i32 %x) { 366 entry: 367 %r = call i32 @llvm.bswap.i32(i32 %x) 368 ret i32 %r 369 } 370 ; CHECK-LABEL: test_bswap_32 371 ; CHECK: bswap e{{.*}} 372 ; ARM32-LABEL: test_bswap_32 373 ; ARM32: rev 374 ; MIPS32-LABEL: test_bswap_32 375 ; MIPS32: srl {{.*}},0x18 376 ; MIPS32: srl {{.*}},0x8 377 ; MIPS32: andi {{.*}},0xff00 378 ; MIPS32: or 379 ; MIPS32: sll {{.*}},0x8 380 ; MIPS32: lui {{.*}},0xff 381 ; MIPS32: and 382 ; MIPS32: sll {{.*}},0x18 383 ; MIPS32: or 384 ; MIPS32: or 385 386 define internal i64 @test_bswap_64(i64 %x) { 387 entry: 388 %r = call i64 @llvm.bswap.i64(i64 %x) 389 ret i64 %r 390 } 391 ; CHECK-LABEL: test_bswap_64 392 ; CHECK: bswap e{{.*}} 393 ; CHECK: bswap e{{.*}} 394 ; ARM32-LABEL: test_bswap_64 395 ; ARM32: rev 396 ; ARM32: rev 397 ; MIPS32-LABEL: test_bswap_64 398 ; MIPS32: sll {{.*}},0x8 399 ; MIPS32: srl {{.*}},0x18 400 ; MIPS32: srl {{.*}},0x8 401 ; MIPS32: andi {{.*}},0xff00 402 ; MIPS32: lui {{.*}},0xff 403 ; MIPS32: or 404 ; MIPS32: and 405 ; MIPS32: sll {{.*}},0x18 406 ; MIPS32: or 407 ; MIPS32: srl {{.*}},0x18 408 ; MIPS32: srl {{.*}},0x8 409 ; MIPS32: andi {{.*}},0xff00 410 ; MIPS32: or 411 ; MIPS32: or 412 ; MIPS32: sll {{.*}},0x8 413 ; MIPS32: and 414 ; MIPS32: sll {{.*}},0x18 415 ; MIPS32: or 416 ; MIPS32: or 417 418 define internal i64 @test_bswap_64_undef() { 419 entry: 420 %r = call i64 @llvm.bswap.i64(i64 undef) 421 ret i64 %r 422 } 423 ; CHECK-LABEL: test_bswap_64_undef 424 ; CHECK: bswap e{{.*}} 425 ; CHECK: bswap e{{.*}} 426 ; ARM32-LABEL: test_bswap_64 427 ; ARM32: rev 428 ; ARM32: rev 429 ; MIPS32-LABEL: test_bswap_64_undef 430 ; MIPS32: sll {{.*}},0x8 431 ; MIPS32: srl {{.*}},0x18 432 ; MIPS32: srl {{.*}},0x8 433 ; MIPS32: andi {{.*}},0xff00 434 ; MIPS32: lui {{.*}},0xff 435 ; MIPS32: or 436 ; MIPS32: and 437 ; MIPS32: sll {{.*}},0x18 438 ; MIPS32: or 439 ; MIPS32: srl {{.*}},0x18 440 ; MIPS32: srl {{.*}},0x8 441 ; MIPS32: andi {{.*}},0xff00 442 ; MIPS32: or 443 ; MIPS32: or 444 ; MIPS32: sll {{.*}},0x8 445 ; MIPS32: and 446 ; MIPS32: sll {{.*}},0x18 447 ; MIPS32: or 448 ; MIPS32: or 449 450 define internal i32 @test_ctlz_32(i32 %x) { 451 entry: 452 %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) 453 ret i32 %r 454 } 455 ; CHECK-LABEL: test_ctlz_32 456 ; TODO(jvoung): If we detect that LZCNT is supported, then use that 457 ; and avoid the need to do the cmovne and xor stuff to guarantee that 458 ; the result is well-defined w/ input == 0. 459 ; CHECK: bsr [[REG_TMP:e.*]],{{.*}} 460 ; CHECK: mov [[REG_RES:e.*]],0x3f 461 ; CHECK: cmovne [[REG_RES]],[[REG_TMP]] 462 ; CHECK: xor [[REG_RES]],0x1f 463 ; ARM32-LABEL: test_ctlz_32 464 ; ARM32: clz 465 ; MIPS32-LABEL: test_ctlz_32 466 ; MIPS32: clz 467 468 define internal i32 @test_ctlz_32_const() { 469 entry: 470 %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) 471 ret i32 %r 472 } 473 ; Could potentially constant fold this, but the front-end should have done that. 474 ; The dest operand must be a register and the source operand must be a register 475 ; or memory. 476 ; CHECK-LABEL: test_ctlz_32_const 477 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} 478 ; ARM32-LABEL: test_ctlz_32_const 479 ; ARM32: clz 480 ; MIPS32-LABEL: test_ctlz_32_const 481 ; MIPS32: clz 482 483 define internal i32 @test_ctlz_32_ignored(i32 %x) { 484 entry: 485 %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) 486 ret i32 1 487 } 488 ; CHECKO2REM-LABEL: test_ctlz_32_ignored 489 ; CHECKO2REM-NOT: bsr 490 491 define internal i64 @test_ctlz_64(i64 %x) { 492 entry: 493 %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) 494 ret i64 %r 495 } 496 ; CHECKO2REM-LABEL: test_ctlz_64 497 ; CHECK-LABEL: test_ctlz_64 498 ; CHECK: bsr [[REG_TMP1:e.*]],{{.*}} 499 ; CHECK: mov [[REG_RES1:e.*]],0x3f 500 ; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]] 501 ; CHECK: xor [[REG_RES1]],0x1f 502 ; CHECK: add [[REG_RES1]],0x20 503 ; CHECK: bsr [[REG_RES2:e.*]],{{.*}} 504 ; CHECK: xor [[REG_RES2]],0x1f 505 ; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]] 506 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] 507 ; CHECK: mov {{.*}},0x0 508 ; ARM32-LABEL: test_ctlz_64 509 ; ARM32: clz 510 ; ARM32: cmp {{.*}}, #0 511 ; ARM32: add {{.*}}, #32 512 ; ARM32: clzne 513 ; ARM32: mov {{.*}}, #0 514 ; MIPS32-LABEL: test_ctlz_64 515 ; MIPS32: clz 516 ; MIPS32: clz 517 ; MIPS32: addiu 518 ; MIPS32: movn 519 ; MIPS32: addiu 520 521 define internal i32 @test_ctlz_64_const(i64 %x) { 522 entry: 523 %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) 524 %r2 = trunc i64 %r to i32 525 ret i32 %r2 526 } 527 ; CHECK-LABEL: test_ctlz_64_const 528 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} 529 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} 530 ; ARM32-LABEL: test_ctlz_64 531 ; ARM32: clz 532 ; ARM32: clzne 533 ; MIPS32-LABEL: test_ctlz_64_const 534 ; MIPS32: clz 535 ; MIPS32: clz 536 ; MIPS32: addiu 537 ; MIPS32: movn 538 ; MIPS32: addiu 539 540 define internal i32 @test_ctlz_64_ignored(i64 %x) { 541 entry: 542 %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) 543 ret i32 2 544 } 545 ; CHECKO2REM-LABEL: test_ctlz_64_ignored 546 ; CHECKO2REM-NOT: bsr 547 548 define internal i32 @test_cttz_32(i32 %x) { 549 entry: 550 %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) 551 ret i32 %r 552 } 553 ; CHECK-LABEL: test_cttz_32 554 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} 555 ; CHECK: mov [[REG_IF_ZERO:e.*]],0x20 556 ; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]] 557 ; ARM32-LABEL: test_cttz_32 558 ; ARM32: rbit 559 ; ARM32: clz 560 ; MIPS32-LABEL: test_cttz_32 561 ; MIPS32: addiu 562 ; MIPS32: nor 563 ; MIPS32: and 564 ; MIPS32: clz 565 ; MIPS32: li 566 ; MIPS32: subu 567 568 define internal i64 @test_cttz_64(i64 %x) { 569 entry: 570 %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) 571 ret i64 %r 572 } 573 ; CHECK-LABEL: test_cttz_64 574 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} 575 ; CHECK: mov [[REG_RES1:e.*]],0x20 576 ; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]] 577 ; CHECK: add [[REG_RES1]],0x20 578 ; CHECK: bsf [[REG_RES2:e.*]],[[REG_LOWER:.*]] 579 ; CHECK: test [[REG_LOWER]],[[REG_LOWER]] 580 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] 581 ; CHECK: mov {{.*}},0x0 582 ; ARM32-LABEL: test_cttz_64 583 ; ARM32: rbit 584 ; ARM32: rbit 585 ; ARM32: clz 586 ; ARM32: cmp {{.*}}, #0 587 ; ARM32: add {{.*}}, #32 588 ; ARM32: clzne 589 ; ARM32: mov {{.*}}, #0 590 ; MIPS32-LABEL: test_cttz_64 591 ; MIPS32: addiu 592 ; MIPS32: nor 593 ; MIPS32: and 594 ; MIPS32: clz 595 ; MIPS32: li 596 ; MIPS32: subu 597 ; MIPS32: addiu 598 ; MIPS32: nor 599 ; MIPS32: and 600 ; MIPS32: clz 601 ; MIPS32: li 602 ; MIPS32: subu 603 604 define internal i32 @test_popcount_32(i32 %x) { 605 entry: 606 %r = call i32 @llvm.ctpop.i32(i32 %x) 607 ret i32 %r 608 } 609 ; CHECK-LABEL: test_popcount_32 610 ; CHECK: call {{.*}} R_{{.*}} __popcountsi2 611 ; ARM32-LABEL: test_popcount_32 612 ; ARM32: bl {{.*}} __popcountsi2 613 ; MIPS32-LABEL: test_popcount_32 614 ; MIPS32: jal {{.*}} __popcountsi2 615 616 define internal i64 @test_popcount_64(i64 %x) { 617 entry: 618 %r = call i64 @llvm.ctpop.i64(i64 %x) 619 ret i64 %r 620 } 621 ; CHECK-LABEL: test_popcount_64 622 ; CHECK: call {{.*}} R_{{.*}} __popcountdi2 623 ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of 624 ; the return value just in case. 625 ; CHECK: mov {{.*}},0x0 626 ; ARM32-LABEL: test_popcount_64 627 ; ARM32: bl {{.*}} __popcountdi2 628 ; ARM32: mov {{.*}}, #0 629 ; MIPS32-LABEL: test_popcount_64 630 ; MIPS32: jal {{.*}} __popcountdi2 631 632 define internal i32 @test_popcount_64_ret_i32(i64 %x) { 633 entry: 634 %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) 635 %r = trunc i64 %r_i64 to i32 636 ret i32 %r 637 } 638 ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. 639 ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 640 ; CHECKO2REM: call {{.*}} R_{{.*}} __popcountdi2 641 ; CHECKO2REM-NOT: mov {{.*}}, 0 642 ; MIPS32-LABEL: test_popcount_64_ret_i32 643 ; MIPS32: jal {{.*}} __popcountdi2 644 ; MIPS32: sw v0,{{.*}} 645 ; MIPS32: sw v1,{{.*}} 646 ; MIPS32: lw v0,{{.*}} 647 ; MIPS32: lw ra,{{.*}} 648 649 define internal void @test_stacksave_noalloca() { 650 entry: 651 %sp = call i8* @llvm.stacksave() 652 call void @llvm.stackrestore(i8* %sp) 653 ret void 654 } 655 ; CHECK-LABEL: test_stacksave_noalloca 656 ; CHECK: mov {{.*}},esp 657 ; CHECK: mov esp,{{.*}} 658 ; ARM32-LABEL: test_stacksave_noalloca 659 ; ARM32: mov {{.*}}, sp 660 ; ARM32: mov sp, {{.*}} 661 ; MIPS32-LABEL: test_stacksave_noalloca 662 ; MIPS32: sw sp,{{.*}} 663 ; MIPS32: lw [[REG:.*]],0(sp) 664 ; MIPS32: move sp,[[REG]] 665 666 declare i32 @foo(i32 %x) 667 668 define internal void @test_stacksave_multiple(i32 %x) { 669 entry: 670 %x_4 = mul i32 %x, 4 671 %sp1 = call i8* @llvm.stacksave() 672 %tmp1 = alloca i8, i32 %x_4, align 4 673 674 %sp2 = call i8* @llvm.stacksave() 675 %tmp2 = alloca i8, i32 %x_4, align 4 676 677 %y = call i32 @foo(i32 %x) 678 679 %sp3 = call i8* @llvm.stacksave() 680 %tmp3 = alloca i8, i32 %x_4, align 4 681 682 %__9 = bitcast i8* %tmp1 to i32* 683 store i32 %y, i32* %__9, align 1 684 685 %__10 = bitcast i8* %tmp2 to i32* 686 store i32 %x, i32* %__10, align 1 687 688 %__11 = bitcast i8* %tmp3 to i32* 689 store i32 %x, i32* %__11, align 1 690 691 call void @llvm.stackrestore(i8* %sp1) 692 ret void 693 } 694 ; CHECK-LABEL: test_stacksave_multiple 695 ; lea is used to copy from esp for the allocas. 696 ; Otherwise, only one stacksave is live. 697 ; CHECK: mov ebp,esp 698 ; CHECK: mov {{.*}},esp 699 ; CHECK: lea {{.*}},[esp+0x10] 700 ; CHECK: lea {{.*}},[esp+0x10] 701 ; CHECK: call 702 ; CHECK: mov esp,{{.*}} 703 ; CHECK: mov esp,ebp 704 ; ARM32-LABEL: test_stacksave_multiple 705 ; ARM32: mov {{.*}}, sp 706 ; ARM32: mov {{.*}}, sp 707 ; ARM32: mov {{.*}}, sp 708 ; ARM32: mov sp, {{.*}} 709 ; MIPS32-LABEL: test_stacksave_multiple 710 ; MIPS32: sw sp,[[MEMLOC:.*]] 711 ; MIPS32: sw sp,{{.*}} 712 ; MIPS32: sw sp,{{.*}} 713 ; MIPS32: lw [[REG:.*]],[[MEMLOC]] 714 ; MIPS32: move sp,[[REG]] 715