1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtimert0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, SP // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtimeg0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVQ $0, AX 32 CPUID 33 CMPQ AX, $0 34 JE nocpuinfo 35 MOVQ $1, AX 36 CPUID 37 MOVL CX, runtimecpuid_ecx(SB) 38 MOVL DX, runtimecpuid_edx(SB) 39 nocpuinfo: 40 41 needtls: 42 LEAL runtimetls0(SB), DI 43 CALL runtimesettls(SB) 44 45 // store through it, to make sure it works 46 get_tls(BX) 47 MOVQ $0x123, g(BX) 48 MOVQ runtimetls0(SB), AX 49 CMPQ AX, $0x123 50 JEQ 2(PC) 51 MOVL AX, 0 // abort 52 ok: 53 // set the per-goroutine and per-mach "registers" 54 get_tls(BX) 55 LEAL runtimeg0(SB), CX 56 MOVL CX, g(BX) 57 LEAL runtimem0(SB), AX 58 59 // save m->g0 = g0 60 MOVL CX, m_g0(AX) 61 // save m0 to g0->m 62 MOVL AX, g_m(CX) 63 64 CLD // convention is D is always left cleared 65 CALL runtimecheck(SB) 66 67 MOVL 16(SP), AX // copy argc 68 MOVL AX, 0(SP) 69 MOVL 24(SP), AX // copy argv 70 MOVL AX, 4(SP) 71 CALL runtimeargs(SB) 72 CALL runtimeosinit(SB) 73 CALL runtimeschedinit(SB) 74 75 // create a new goroutine to start program 76 MOVL $runtimemainPC(SB), AX // entry 77 MOVL $0, 0(SP) 78 MOVL AX, 4(SP) 79 CALL runtimenewproc(SB) 80 81 // start this M 82 CALL runtimemstart(SB) 83 84 MOVL $0xf1, 0xf1 // crash 85 RET 86 87 DATA runtimemainPC+0(SB)/4,$runtimemain(SB) 88 GLOBL runtimemainPC(SB),RODATA,$4 89 90 TEXT runtimebreakpoint(SB),NOSPLIT,$0-0 91 INT $3 92 RET 93 94 TEXT runtimeasminit(SB),NOSPLIT,$0-0 95 // No per-thread init. 96 RET 97 98 /* 99 * go-routine 100 */ 101 102 // void gosave(Gobuf*) 103 // save state in Gobuf; setjmp 104 TEXT runtimegosave(SB), NOSPLIT, $0-4 105 MOVL buf+0(FP), AX // gobuf 106 LEAL buf+0(FP), BX // caller's SP 107 MOVL BX, gobuf_sp(AX) 108 MOVL 0(SP), BX // caller's PC 109 MOVL BX, gobuf_pc(AX) 110 MOVL $0, gobuf_ctxt(AX) 111 MOVQ $0, gobuf_ret(AX) 112 get_tls(CX) 113 MOVL g(CX), BX 114 MOVL BX, gobuf_g(AX) 115 RET 116 117 // void gogo(Gobuf*) 118 // restore state from Gobuf; longjmp 119 TEXT runtimegogo(SB), NOSPLIT, $0-4 120 MOVL buf+0(FP), BX // gobuf 121 MOVL gobuf_g(BX), DX 122 MOVL 0(DX), CX // make sure g != nil 123 get_tls(CX) 124 MOVL DX, g(CX) 125 MOVL gobuf_sp(BX), SP // restore SP 126 MOVL gobuf_ctxt(BX), DX 127 MOVQ gobuf_ret(BX), AX 128 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 129 MOVQ $0, gobuf_ret(BX) 130 MOVL $0, gobuf_ctxt(BX) 131 MOVL gobuf_pc(BX), BX 132 JMP BX 133 134 // func mcall(fn func(*g)) 135 // Switch to m->g0's stack, call fn(g). 136 // Fn must never return. It should gogo(&g->sched) 137 // to keep running g. 138 TEXT runtimemcall(SB), NOSPLIT, $0-4 139 MOVL fn+0(FP), DI 140 141 get_tls(CX) 142 MOVL g(CX), AX // save state in g->sched 143 MOVL 0(SP), BX // caller's PC 144 MOVL BX, (g_sched+gobuf_pc)(AX) 145 LEAL fn+0(FP), BX // caller's SP 146 MOVL BX, (g_sched+gobuf_sp)(AX) 147 MOVL AX, (g_sched+gobuf_g)(AX) 148 149 // switch to m->g0 & its stack, call fn 150 MOVL g(CX), BX 151 MOVL g_m(BX), BX 152 MOVL m_g0(BX), SI 153 CMPL SI, AX // if g == m->g0 call badmcall 154 JNE 3(PC) 155 MOVL $runtimebadmcall(SB), AX 156 JMP AX 157 MOVL SI, g(CX) // g = m->g0 158 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 159 PUSHQ AX 160 MOVL DI, DX 161 MOVL 0(DI), DI 162 CALL DI 163 POPQ AX 164 MOVL $runtimebadmcall2(SB), AX 165 JMP AX 166 RET 167 168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 169 // of the G stack. We need to distinguish the routine that 170 // lives at the bottom of the G stack from the one that lives 171 // at the top of the system stack because the one at the top of 172 // the system stack terminates the stack walk (see topofstack()). 173 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0 174 RET 175 176 // func systemstack(fn func()) 177 TEXT runtimesystemstack(SB), NOSPLIT, $0-4 178 MOVL fn+0(FP), DI // DI = fn 179 get_tls(CX) 180 MOVL g(CX), AX // AX = g 181 MOVL g_m(AX), BX // BX = m 182 183 MOVL m_gsignal(BX), DX // DX = gsignal 184 CMPL AX, DX 185 JEQ noswitch 186 187 MOVL m_g0(BX), DX // DX = g0 188 CMPL AX, DX 189 JEQ noswitch 190 191 MOVL m_curg(BX), R8 192 CMPL AX, R8 193 JEQ switch 194 195 // Not g0, not curg. Must be gsignal, but that's not allowed. 196 // Hide call from linker nosplit analysis. 197 MOVL $runtimebadsystemstack(SB), AX 198 CALL AX 199 200 switch: 201 // save our state in g->sched. Pretend to 202 // be systemstack_switch if the G stack is scanned. 203 MOVL $runtimesystemstack_switch(SB), SI 204 MOVL SI, (g_sched+gobuf_pc)(AX) 205 MOVL SP, (g_sched+gobuf_sp)(AX) 206 MOVL AX, (g_sched+gobuf_g)(AX) 207 208 // switch to g0 209 MOVL DX, g(CX) 210 MOVL (g_sched+gobuf_sp)(DX), SP 211 212 // call target function 213 MOVL DI, DX 214 MOVL 0(DI), DI 215 CALL DI 216 217 // switch back to g 218 get_tls(CX) 219 MOVL g(CX), AX 220 MOVL g_m(AX), BX 221 MOVL m_curg(BX), AX 222 MOVL AX, g(CX) 223 MOVL (g_sched+gobuf_sp)(AX), SP 224 MOVL $0, (g_sched+gobuf_sp)(AX) 225 RET 226 227 noswitch: 228 // already on m stack, just call directly 229 MOVL DI, DX 230 MOVL 0(DI), DI 231 CALL DI 232 RET 233 234 /* 235 * support for morestack 236 */ 237 238 // Called during function prolog when more stack is needed. 239 // 240 // The traceback routines see morestack on a g0 as being 241 // the top of a stack (for example, morestack calling newstack 242 // calling the scheduler calling newm calling gc), so we must 243 // record an argument size. For that purpose, it has no arguments. 244 TEXT runtimemorestack(SB),NOSPLIT,$0-0 245 get_tls(CX) 246 MOVL g(CX), BX 247 MOVL g_m(BX), BX 248 249 // Cannot grow scheduler stack (m->g0). 250 MOVL m_g0(BX), SI 251 CMPL g(CX), SI 252 JNE 2(PC) 253 MOVL 0, AX 254 255 // Cannot grow signal stack (m->gsignal). 256 MOVL m_gsignal(BX), SI 257 CMPL g(CX), SI 258 JNE 2(PC) 259 MOVL 0, AX 260 261 // Called from f. 262 // Set m->morebuf to f's caller. 263 MOVL 8(SP), AX // f's caller's PC 264 MOVL AX, (m_morebuf+gobuf_pc)(BX) 265 LEAL 16(SP), AX // f's caller's SP 266 MOVL AX, (m_morebuf+gobuf_sp)(BX) 267 get_tls(CX) 268 MOVL g(CX), SI 269 MOVL SI, (m_morebuf+gobuf_g)(BX) 270 271 // Set g->sched to context in f. 272 MOVL 0(SP), AX // f's PC 273 MOVL AX, (g_sched+gobuf_pc)(SI) 274 MOVL SI, (g_sched+gobuf_g)(SI) 275 LEAL 8(SP), AX // f's SP 276 MOVL AX, (g_sched+gobuf_sp)(SI) 277 MOVL DX, (g_sched+gobuf_ctxt)(SI) 278 279 // Call newstack on m->g0's stack. 280 MOVL m_g0(BX), BX 281 MOVL BX, g(CX) 282 MOVL (g_sched+gobuf_sp)(BX), SP 283 CALL runtimenewstack(SB) 284 MOVL $0, 0x1003 // crash if newstack returns 285 RET 286 287 // morestack trampolines 288 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$0 289 MOVL $0, DX 290 JMP runtimemorestack(SB) 291 292 TEXT runtimestackBarrier(SB),NOSPLIT,$0 293 // We came here via a RET to an overwritten return PC. 294 // AX may be live. Other registers are available. 295 296 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 297 get_tls(CX) 298 MOVL g(CX), CX 299 MOVL (g_stkbar+slice_array)(CX), DX 300 MOVL g_stkbarPos(CX), BX 301 IMULL $stkbar__size, BX // Too big for SIB. 302 ADDL DX, BX 303 MOVL stkbar_savedLRVal(BX), BX 304 // Record that this stack barrier was hit. 305 ADDL $1, g_stkbarPos(CX) 306 // Jump to the original return PC. 307 JMP BX 308 309 // reflectcall: call a function with the given argument list 310 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 311 // we don't have variable-sized frames, so we use a small number 312 // of constant-sized-frame functions to encode a few bits of size in the pc. 313 // Caution: ugly multiline assembly macros in your future! 314 315 #define DISPATCH(NAME,MAXSIZE) \ 316 CMPL CX, $MAXSIZE; \ 317 JA 3(PC); \ 318 MOVL $NAME(SB), AX; \ 319 JMP AX 320 // Note: can't just "JMP NAME(SB)" - bad inlining results. 321 322 TEXT reflectcall(SB), NOSPLIT, $0-0 323 JMP reflectcall(SB) 324 325 TEXT reflectcall(SB), NOSPLIT, $0-20 326 MOVLQZX argsize+12(FP), CX 327 DISPATCH(runtimecall16, 16) 328 DISPATCH(runtimecall32, 32) 329 DISPATCH(runtimecall64, 64) 330 DISPATCH(runtimecall128, 128) 331 DISPATCH(runtimecall256, 256) 332 DISPATCH(runtimecall512, 512) 333 DISPATCH(runtimecall1024, 1024) 334 DISPATCH(runtimecall2048, 2048) 335 DISPATCH(runtimecall4096, 4096) 336 DISPATCH(runtimecall8192, 8192) 337 DISPATCH(runtimecall16384, 16384) 338 DISPATCH(runtimecall32768, 32768) 339 DISPATCH(runtimecall65536, 65536) 340 DISPATCH(runtimecall131072, 131072) 341 DISPATCH(runtimecall262144, 262144) 342 DISPATCH(runtimecall524288, 524288) 343 DISPATCH(runtimecall1048576, 1048576) 344 DISPATCH(runtimecall2097152, 2097152) 345 DISPATCH(runtimecall4194304, 4194304) 346 DISPATCH(runtimecall8388608, 8388608) 347 DISPATCH(runtimecall16777216, 16777216) 348 DISPATCH(runtimecall33554432, 33554432) 349 DISPATCH(runtimecall67108864, 67108864) 350 DISPATCH(runtimecall134217728, 134217728) 351 DISPATCH(runtimecall268435456, 268435456) 352 DISPATCH(runtimecall536870912, 536870912) 353 DISPATCH(runtimecall1073741824, 1073741824) 354 MOVL $runtimebadreflectcall(SB), AX 355 JMP AX 356 357 #define CALLFN(NAME,MAXSIZE) \ 358 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 359 NO_LOCAL_POINTERS; \ 360 /* copy arguments to stack */ \ 361 MOVL argptr+8(FP), SI; \ 362 MOVL argsize+12(FP), CX; \ 363 MOVL SP, DI; \ 364 REP;MOVSB; \ 365 /* call function */ \ 366 MOVL f+4(FP), DX; \ 367 MOVL (DX), AX; \ 368 CALL AX; \ 369 /* copy return values back */ \ 370 MOVL argptr+8(FP), DI; \ 371 MOVL argsize+12(FP), CX; \ 372 MOVL retoffset+16(FP), BX; \ 373 MOVL SP, SI; \ 374 ADDL BX, DI; \ 375 ADDL BX, SI; \ 376 SUBL BX, CX; \ 377 REP;MOVSB; \ 378 /* execute write barrier updates */ \ 379 MOVL argtype+0(FP), DX; \ 380 MOVL argptr+8(FP), DI; \ 381 MOVL argsize+12(FP), CX; \ 382 MOVL retoffset+16(FP), BX; \ 383 MOVL DX, 0(SP); \ 384 MOVL DI, 4(SP); \ 385 MOVL CX, 8(SP); \ 386 MOVL BX, 12(SP); \ 387 CALL runtimecallwritebarrier(SB); \ 388 RET 389 390 CALLFN(call16, 16) 391 CALLFN(call32, 32) 392 CALLFN(call64, 64) 393 CALLFN(call128, 128) 394 CALLFN(call256, 256) 395 CALLFN(call512, 512) 396 CALLFN(call1024, 1024) 397 CALLFN(call2048, 2048) 398 CALLFN(call4096, 4096) 399 CALLFN(call8192, 8192) 400 CALLFN(call16384, 16384) 401 CALLFN(call32768, 32768) 402 CALLFN(call65536, 65536) 403 CALLFN(call131072, 131072) 404 CALLFN(call262144, 262144) 405 CALLFN(call524288, 524288) 406 CALLFN(call1048576, 1048576) 407 CALLFN(call2097152, 2097152) 408 CALLFN(call4194304, 4194304) 409 CALLFN(call8388608, 8388608) 410 CALLFN(call16777216, 16777216) 411 CALLFN(call33554432, 33554432) 412 CALLFN(call67108864, 67108864) 413 CALLFN(call134217728, 134217728) 414 CALLFN(call268435456, 268435456) 415 CALLFN(call536870912, 536870912) 416 CALLFN(call1073741824, 1073741824) 417 418 // bool cas(int32 *val, int32 old, int32 new) 419 // Atomically: 420 // if(*val == old){ 421 // *val = new; 422 // return 1; 423 // } else 424 // return 0; 425 TEXT runtimecas(SB), NOSPLIT, $0-17 426 MOVL ptr+0(FP), BX 427 MOVL old+4(FP), AX 428 MOVL new+8(FP), CX 429 LOCK 430 CMPXCHGL CX, 0(BX) 431 SETEQ ret+16(FP) 432 RET 433 434 TEXT runtimecasuintptr(SB), NOSPLIT, $0-17 435 JMP runtimecas(SB) 436 437 TEXT runtimeatomicloaduintptr(SB), NOSPLIT, $0-12 438 JMP runtimeatomicload(SB) 439 440 TEXT runtimeatomicloaduint(SB), NOSPLIT, $0-12 441 JMP runtimeatomicload(SB) 442 443 TEXT runtimeatomicstoreuintptr(SB), NOSPLIT, $0-12 444 JMP runtimeatomicstore(SB) 445 446 // bool runtimecas64(uint64 *val, uint64 old, uint64 new) 447 // Atomically: 448 // if(*val == *old){ 449 // *val = new; 450 // return 1; 451 // } else { 452 // return 0; 453 // } 454 TEXT runtimecas64(SB), NOSPLIT, $0-25 455 MOVL ptr+0(FP), BX 456 MOVQ old+8(FP), AX 457 MOVQ new+16(FP), CX 458 LOCK 459 CMPXCHGQ CX, 0(BX) 460 SETEQ ret+24(FP) 461 RET 462 463 // bool casp(void **val, void *old, void *new) 464 // Atomically: 465 // if(*val == old){ 466 // *val = new; 467 // return 1; 468 // } else 469 // return 0; 470 TEXT runtimecasp1(SB), NOSPLIT, $0-17 471 MOVL ptr+0(FP), BX 472 MOVL old+4(FP), AX 473 MOVL new+8(FP), CX 474 LOCK 475 CMPXCHGL CX, 0(BX) 476 SETEQ ret+16(FP) 477 RET 478 479 // uint32 xadd(uint32 volatile *val, int32 delta) 480 // Atomically: 481 // *val += delta; 482 // return *val; 483 TEXT runtimexadd(SB), NOSPLIT, $0-12 484 MOVL ptr+0(FP), BX 485 MOVL delta+4(FP), AX 486 MOVL AX, CX 487 LOCK 488 XADDL AX, 0(BX) 489 ADDL CX, AX 490 MOVL AX, ret+8(FP) 491 RET 492 493 TEXT runtimexadd64(SB), NOSPLIT, $0-24 494 MOVL ptr+0(FP), BX 495 MOVQ delta+8(FP), AX 496 MOVQ AX, CX 497 LOCK 498 XADDQ AX, 0(BX) 499 ADDQ CX, AX 500 MOVQ AX, ret+16(FP) 501 RET 502 503 TEXT runtimexadduintptr(SB), NOSPLIT, $0-12 504 JMP runtimexadd(SB) 505 506 TEXT runtimexchg(SB), NOSPLIT, $0-12 507 MOVL ptr+0(FP), BX 508 MOVL new+4(FP), AX 509 XCHGL AX, 0(BX) 510 MOVL AX, ret+8(FP) 511 RET 512 513 TEXT runtimexchg64(SB), NOSPLIT, $0-24 514 MOVL ptr+0(FP), BX 515 MOVQ new+8(FP), AX 516 XCHGQ AX, 0(BX) 517 MOVQ AX, ret+16(FP) 518 RET 519 520 TEXT runtimexchgp1(SB), NOSPLIT, $0-12 521 MOVL ptr+0(FP), BX 522 MOVL new+4(FP), AX 523 XCHGL AX, 0(BX) 524 MOVL AX, ret+8(FP) 525 RET 526 527 TEXT runtimexchguintptr(SB), NOSPLIT, $0-12 528 JMP runtimexchg(SB) 529 530 TEXT runtimeprocyield(SB),NOSPLIT,$0-0 531 MOVL cycles+0(FP), AX 532 again: 533 PAUSE 534 SUBL $1, AX 535 JNZ again 536 RET 537 538 TEXT runtimeatomicstorep1(SB), NOSPLIT, $0-8 539 MOVL ptr+0(FP), BX 540 MOVL val+4(FP), AX 541 XCHGL AX, 0(BX) 542 RET 543 544 TEXT runtimeatomicstore(SB), NOSPLIT, $0-8 545 MOVL ptr+0(FP), BX 546 MOVL val+4(FP), AX 547 XCHGL AX, 0(BX) 548 RET 549 550 TEXT runtimeatomicstore64(SB), NOSPLIT, $0-16 551 MOVL ptr+0(FP), BX 552 MOVQ val+8(FP), AX 553 XCHGQ AX, 0(BX) 554 RET 555 556 // void runtimeatomicor8(byte volatile*, byte); 557 TEXT runtimeatomicor8(SB), NOSPLIT, $0-5 558 MOVL ptr+0(FP), BX 559 MOVB val+4(FP), AX 560 LOCK 561 ORB AX, 0(BX) 562 RET 563 564 // void runtimeatomicand8(byte volatile*, byte); 565 TEXT runtimeatomicand8(SB), NOSPLIT, $0-5 566 MOVL ptr+0(FP), BX 567 MOVB val+4(FP), AX 568 LOCK 569 ANDB AX, 0(BX) 570 RET 571 572 TEXT publicationBarrier(SB),NOSPLIT,$0-0 573 // Stores are already ordered on x86, so this is just a 574 // compile barrier. 575 RET 576 577 // void jmpdefer(fn, sp); 578 // called from deferreturn. 579 // 1. pop the caller 580 // 2. sub 5 bytes from the callers return 581 // 3. jmp to the argument 582 TEXT runtimejmpdefer(SB), NOSPLIT, $0-8 583 MOVL fv+0(FP), DX 584 MOVL argp+4(FP), BX 585 LEAL -8(BX), SP // caller sp after CALL 586 SUBL $5, (SP) // return to CALL again 587 MOVL 0(DX), BX 588 JMP BX // but first run the deferred function 589 590 // func asmcgocall(fn, arg unsafe.Pointer) int32 591 // Not implemented. 592 TEXT runtimeasmcgocall(SB),NOSPLIT,$0-12 593 MOVL 0, AX 594 RET 595 596 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 597 // Not implemented. 598 TEXT runtimecgocallback(SB),NOSPLIT,$0-12 599 MOVL 0, AX 600 RET 601 602 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 603 // Not implemented. 604 TEXT cgocallback_gofunc(SB),NOSPLIT,$0-12 605 MOVL 0, AX 606 RET 607 608 // void setg(G*); set g. for use by needm. 609 // Not implemented. 610 TEXT runtimesetg(SB), NOSPLIT, $0-4 611 MOVL 0, AX 612 RET 613 614 // check that SP is in range [g->stack.lo, g->stack.hi) 615 TEXT runtimestackcheck(SB), NOSPLIT, $0-0 616 get_tls(CX) 617 MOVL g(CX), AX 618 CMPL (g_stack+stack_hi)(AX), SP 619 JHI 2(PC) 620 MOVL 0, AX 621 CMPL SP, (g_stack+stack_lo)(AX) 622 JHI 2(PC) 623 MOVL 0, AX 624 RET 625 626 TEXT runtimememclr(SB),NOSPLIT,$0-8 627 MOVL ptr+0(FP), DI 628 MOVL n+4(FP), CX 629 MOVQ CX, BX 630 ANDQ $7, BX 631 SHRQ $3, CX 632 MOVQ $0, AX 633 CLD 634 REP 635 STOSQ 636 MOVQ BX, CX 637 REP 638 STOSB 639 RET 640 641 TEXT runtimegetcallerpc(SB),NOSPLIT,$8-12 642 MOVL argp+0(FP),AX // addr of first arg 643 MOVL -8(AX),AX // get calling pc 644 CMPL AX, runtimestackBarrierPC(SB) 645 JNE nobar 646 // Get original return PC. 647 CALL runtimenextBarrierPC(SB) 648 MOVL 0(SP), AX 649 nobar: 650 MOVL AX, ret+8(FP) 651 RET 652 653 TEXT runtimesetcallerpc(SB),NOSPLIT,$8-8 654 MOVL argp+0(FP),AX // addr of first arg 655 MOVL pc+4(FP), BX // pc to set 656 MOVL -8(AX), CX 657 CMPL CX, runtimestackBarrierPC(SB) 658 JEQ setbar 659 MOVQ BX, -8(AX) // set calling pc 660 RET 661 setbar: 662 // Set the stack barrier return PC. 663 MOVL BX, 0(SP) 664 CALL runtimesetNextBarrierPC(SB) 665 RET 666 667 TEXT runtimegetcallersp(SB),NOSPLIT,$0-12 668 MOVL argp+0(FP), AX 669 MOVL AX, ret+8(FP) 670 RET 671 672 // int64 runtimecputicks(void) 673 TEXT runtimecputicks(SB),NOSPLIT,$0-0 674 RDTSC 675 SHLQ $32, DX 676 ADDQ DX, AX 677 MOVQ AX, ret+0(FP) 678 RET 679 680 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 681 // redirects to memhash(p, h, size) using the size 682 // stored in the closure. 683 TEXT runtimememhash_varlen(SB),NOSPLIT,$24-12 684 GO_ARGS 685 NO_LOCAL_POINTERS 686 MOVL p+0(FP), AX 687 MOVL h+4(FP), BX 688 MOVL 4(DX), CX 689 MOVL AX, 0(SP) 690 MOVL BX, 4(SP) 691 MOVL CX, 8(SP) 692 CALL runtimememhash(SB) 693 MOVL 16(SP), AX 694 MOVL AX, ret+8(FP) 695 RET 696 697 // hash function using AES hardware instructions 698 // For now, our one amd64p32 system (NaCl) does not 699 // support using AES instructions, so have not bothered to 700 // write the implementations. Can copy and adjust the ones 701 // in asm_amd64.s when the time comes. 702 703 TEXT runtimeaeshash(SB),NOSPLIT,$0-20 704 MOVL AX, ret+16(FP) 705 RET 706 707 TEXT runtimeaeshashstr(SB),NOSPLIT,$0-20 708 MOVL AX, ret+16(FP) 709 RET 710 711 TEXT runtimeaeshash32(SB),NOSPLIT,$0-20 712 MOVL AX, ret+16(FP) 713 RET 714 715 TEXT runtimeaeshash64(SB),NOSPLIT,$0-20 716 MOVL AX, ret+16(FP) 717 RET 718 719 TEXT runtimememeq(SB),NOSPLIT,$0-17 720 MOVL a+0(FP), SI 721 MOVL b+4(FP), DI 722 MOVL size+8(FP), BX 723 CALL runtimememeqbody(SB) 724 MOVB AX, ret+16(FP) 725 RET 726 727 // memequal_varlen(a, b unsafe.Pointer) bool 728 TEXT runtimememequal_varlen(SB),NOSPLIT,$0-9 729 MOVL a+0(FP), SI 730 MOVL b+4(FP), DI 731 CMPL SI, DI 732 JEQ eq 733 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 734 CALL runtimememeqbody(SB) 735 MOVB AX, ret+8(FP) 736 RET 737 eq: 738 MOVB $1, ret+8(FP) 739 RET 740 741 // eqstring tests whether two strings are equal. 742 // The compiler guarantees that strings passed 743 // to eqstring have equal length. 744 // See runtime_test.go:eqstring_generic for 745 // equivalent Go code. 746 TEXT runtimeeqstring(SB),NOSPLIT,$0-17 747 MOVL s1str+0(FP), SI 748 MOVL s2str+8(FP), DI 749 CMPL SI, DI 750 JEQ same 751 MOVL s1len+4(FP), BX 752 CALL runtimememeqbody(SB) 753 MOVB AX, v+16(FP) 754 RET 755 same: 756 MOVB $1, v+16(FP) 757 RET 758 759 // a in SI 760 // b in DI 761 // count in BX 762 TEXT runtimememeqbody(SB),NOSPLIT,$0-0 763 XORQ AX, AX 764 765 CMPQ BX, $8 766 JB small 767 768 // 64 bytes at a time using xmm registers 769 hugeloop: 770 CMPQ BX, $64 771 JB bigloop 772 MOVOU (SI), X0 773 MOVOU (DI), X1 774 MOVOU 16(SI), X2 775 MOVOU 16(DI), X3 776 MOVOU 32(SI), X4 777 MOVOU 32(DI), X5 778 MOVOU 48(SI), X6 779 MOVOU 48(DI), X7 780 PCMPEQB X1, X0 781 PCMPEQB X3, X2 782 PCMPEQB X5, X4 783 PCMPEQB X7, X6 784 PAND X2, X0 785 PAND X6, X4 786 PAND X4, X0 787 PMOVMSKB X0, DX 788 ADDQ $64, SI 789 ADDQ $64, DI 790 SUBQ $64, BX 791 CMPL DX, $0xffff 792 JEQ hugeloop 793 RET 794 795 // 8 bytes at a time using 64-bit register 796 bigloop: 797 CMPQ BX, $8 798 JBE leftover 799 MOVQ (SI), CX 800 MOVQ (DI), DX 801 ADDQ $8, SI 802 ADDQ $8, DI 803 SUBQ $8, BX 804 CMPQ CX, DX 805 JEQ bigloop 806 RET 807 808 // remaining 0-8 bytes 809 leftover: 810 ADDQ BX, SI 811 ADDQ BX, DI 812 MOVQ -8(SI), CX 813 MOVQ -8(DI), DX 814 CMPQ CX, DX 815 SETEQ AX 816 RET 817 818 small: 819 CMPQ BX, $0 820 JEQ equal 821 822 LEAQ 0(BX*8), CX 823 NEGQ CX 824 825 CMPB SI, $0xf8 826 JA si_high 827 828 // load at SI won't cross a page boundary. 829 MOVQ (SI), SI 830 JMP si_finish 831 si_high: 832 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 833 MOVQ BX, DX 834 ADDQ SI, DX 835 MOVQ -8(DX), SI 836 SHRQ CX, SI 837 si_finish: 838 839 // same for DI. 840 CMPB DI, $0xf8 841 JA di_high 842 MOVQ (DI), DI 843 JMP di_finish 844 di_high: 845 MOVQ BX, DX 846 ADDQ DI, DX 847 MOVQ -8(DX), DI 848 SHRQ CX, DI 849 di_finish: 850 851 SUBQ SI, DI 852 SHLQ CX, DI 853 equal: 854 SETEQ AX 855 RET 856 857 TEXT runtimecmpstring(SB),NOSPLIT,$0-20 858 MOVL s1_base+0(FP), SI 859 MOVL s1_len+4(FP), BX 860 MOVL s2_base+8(FP), DI 861 MOVL s2_len+12(FP), DX 862 CALL runtimecmpbody(SB) 863 MOVL AX, ret+16(FP) 864 RET 865 866 TEXT bytesCompare(SB),NOSPLIT,$0-28 867 MOVL s1+0(FP), SI 868 MOVL s1+4(FP), BX 869 MOVL s2+12(FP), DI 870 MOVL s2+16(FP), DX 871 CALL runtimecmpbody(SB) 872 MOVL AX, res+24(FP) 873 RET 874 875 // input: 876 // SI = a 877 // DI = b 878 // BX = alen 879 // DX = blen 880 // output: 881 // AX = 1/0/-1 882 TEXT runtimecmpbody(SB),NOSPLIT,$0-0 883 CMPQ SI, DI 884 JEQ allsame 885 CMPQ BX, DX 886 MOVQ DX, R8 887 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 888 CMPQ R8, $8 889 JB small 890 891 loop: 892 CMPQ R8, $16 893 JBE _0through16 894 MOVOU (SI), X0 895 MOVOU (DI), X1 896 PCMPEQB X0, X1 897 PMOVMSKB X1, AX 898 XORQ $0xffff, AX // convert EQ to NE 899 JNE diff16 // branch if at least one byte is not equal 900 ADDQ $16, SI 901 ADDQ $16, DI 902 SUBQ $16, R8 903 JMP loop 904 905 // AX = bit mask of differences 906 diff16: 907 BSFQ AX, BX // index of first byte that differs 908 XORQ AX, AX 909 ADDQ BX, SI 910 MOVB (SI), CX 911 ADDQ BX, DI 912 CMPB CX, (DI) 913 SETHI AX 914 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 915 RET 916 917 // 0 through 16 bytes left, alen>=8, blen>=8 918 _0through16: 919 CMPQ R8, $8 920 JBE _0through8 921 MOVQ (SI), AX 922 MOVQ (DI), CX 923 CMPQ AX, CX 924 JNE diff8 925 _0through8: 926 ADDQ R8, SI 927 ADDQ R8, DI 928 MOVQ -8(SI), AX 929 MOVQ -8(DI), CX 930 CMPQ AX, CX 931 JEQ allsame 932 933 // AX and CX contain parts of a and b that differ. 934 diff8: 935 BSWAPQ AX // reverse order of bytes 936 BSWAPQ CX 937 XORQ AX, CX 938 BSRQ CX, CX // index of highest bit difference 939 SHRQ CX, AX // move a's bit to bottom 940 ANDQ $1, AX // mask bit 941 LEAQ -1(AX*2), AX // 1/0 => +1/-1 942 RET 943 944 // 0-7 bytes in common 945 small: 946 LEAQ (R8*8), CX // bytes left -> bits left 947 NEGQ CX // - bits lift (== 64 - bits left mod 64) 948 JEQ allsame 949 950 // load bytes of a into high bytes of AX 951 CMPB SI, $0xf8 952 JA si_high 953 MOVQ (SI), SI 954 JMP si_finish 955 si_high: 956 ADDQ R8, SI 957 MOVQ -8(SI), SI 958 SHRQ CX, SI 959 si_finish: 960 SHLQ CX, SI 961 962 // load bytes of b in to high bytes of BX 963 CMPB DI, $0xf8 964 JA di_high 965 MOVQ (DI), DI 966 JMP di_finish 967 di_high: 968 ADDQ R8, DI 969 MOVQ -8(DI), DI 970 SHRQ CX, DI 971 di_finish: 972 SHLQ CX, DI 973 974 BSWAPQ SI // reverse order of bytes 975 BSWAPQ DI 976 XORQ SI, DI // find bit differences 977 JEQ allsame 978 BSRQ DI, CX // index of highest bit difference 979 SHRQ CX, SI // move a's bit to bottom 980 ANDQ $1, SI // mask bit 981 LEAQ -1(SI*2), AX // 1/0 => +1/-1 982 RET 983 984 allsame: 985 XORQ AX, AX 986 XORQ CX, CX 987 CMPQ BX, DX 988 SETGT AX // 1 if alen > blen 989 SETEQ CX // 1 if alen == blen 990 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 991 RET 992 993 TEXT bytesIndexByte(SB),NOSPLIT,$0-20 994 MOVL s+0(FP), SI 995 MOVL s_len+4(FP), BX 996 MOVB c+12(FP), AL 997 CALL runtimeindexbytebody(SB) 998 MOVL AX, ret+16(FP) 999 RET 1000 1001 TEXT stringsIndexByte(SB),NOSPLIT,$0-20 1002 MOVL s+0(FP), SI 1003 MOVL s_len+4(FP), BX 1004 MOVB c+8(FP), AL 1005 CALL runtimeindexbytebody(SB) 1006 MOVL AX, ret+16(FP) 1007 RET 1008 1009 // input: 1010 // SI: data 1011 // BX: data len 1012 // AL: byte sought 1013 // output: 1014 // AX 1015 TEXT runtimeindexbytebody(SB),NOSPLIT,$0 1016 MOVL SI, DI 1017 1018 CMPL BX, $16 1019 JLT small 1020 1021 // round up to first 16-byte boundary 1022 TESTL $15, SI 1023 JZ aligned 1024 MOVL SI, CX 1025 ANDL $~15, CX 1026 ADDL $16, CX 1027 1028 // search the beginning 1029 SUBL SI, CX 1030 REPN; SCASB 1031 JZ success 1032 1033 // DI is 16-byte aligned; get ready to search using SSE instructions 1034 aligned: 1035 // round down to last 16-byte boundary 1036 MOVL BX, R11 1037 ADDL SI, R11 1038 ANDL $~15, R11 1039 1040 // shuffle X0 around so that each byte contains c 1041 MOVD AX, X0 1042 PUNPCKLBW X0, X0 1043 PUNPCKLBW X0, X0 1044 PSHUFL $0, X0, X0 1045 JMP condition 1046 1047 sse: 1048 // move the next 16-byte chunk of the buffer into X1 1049 MOVO (DI), X1 1050 // compare bytes in X0 to X1 1051 PCMPEQB X0, X1 1052 // take the top bit of each byte in X1 and put the result in DX 1053 PMOVMSKB X1, DX 1054 TESTL DX, DX 1055 JNZ ssesuccess 1056 ADDL $16, DI 1057 1058 condition: 1059 CMPL DI, R11 1060 JLT sse 1061 1062 // search the end 1063 MOVL SI, CX 1064 ADDL BX, CX 1065 SUBL R11, CX 1066 // if CX == 0, the zero flag will be set and we'll end up 1067 // returning a false success 1068 JZ failure 1069 REPN; SCASB 1070 JZ success 1071 1072 failure: 1073 MOVL $-1, AX 1074 RET 1075 1076 // handle for lengths < 16 1077 small: 1078 MOVL BX, CX 1079 REPN; SCASB 1080 JZ success 1081 MOVL $-1, AX 1082 RET 1083 1084 // we've found the chunk containing the byte 1085 // now just figure out which specific byte it is 1086 ssesuccess: 1087 // get the index of the least significant set bit 1088 BSFW DX, DX 1089 SUBL SI, DI 1090 ADDL DI, DX 1091 MOVL DX, AX 1092 RET 1093 1094 success: 1095 SUBL SI, DI 1096 SUBL $1, DI 1097 MOVL DI, AX 1098 RET 1099 1100 TEXT bytesEqual(SB),NOSPLIT,$0-25 1101 MOVL a_len+4(FP), BX 1102 MOVL b_len+16(FP), CX 1103 XORL AX, AX 1104 CMPL BX, CX 1105 JNE eqret 1106 MOVL a+0(FP), SI 1107 MOVL b+12(FP), DI 1108 CALL runtimememeqbody(SB) 1109 eqret: 1110 MOVB AX, ret+24(FP) 1111 RET 1112 1113 TEXT runtimefastrand1(SB), NOSPLIT, $0-4 1114 get_tls(CX) 1115 MOVL g(CX), AX 1116 MOVL g_m(AX), AX 1117 MOVL m_fastrand(AX), DX 1118 ADDL DX, DX 1119 MOVL DX, BX 1120 XORL $0x88888eef, DX 1121 CMOVLMI BX, DX 1122 MOVL DX, m_fastrand(AX) 1123 MOVL DX, ret+0(FP) 1124 RET 1125 1126 TEXT runtimereturn0(SB), NOSPLIT, $0 1127 MOVL $0, AX 1128 RET 1129 1130 // The top-most function running on a goroutine 1131 // returns to goexit+PCQuantum. 1132 TEXT runtimegoexit(SB),NOSPLIT,$0-0 1133 BYTE $0x90 // NOP 1134 CALL runtimegoexit1(SB) // does not return 1135 // traceback from goexit1 must hit code range of goexit 1136 BYTE $0x90 // NOP 1137 1138 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-4 1139 MOVL addr+0(FP), AX 1140 PREFETCHT0 (AX) 1141 RET 1142 1143 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-4 1144 MOVL addr+0(FP), AX 1145 PREFETCHT1 (AX) 1146 RET 1147 1148 1149 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-4 1150 MOVL addr+0(FP), AX 1151 PREFETCHT2 (AX) 1152 RET 1153 1154 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-4 1155 MOVL addr+0(FP), AX 1156 PREFETCHNTA (AX) 1157 RET 1158