1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtimert0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtimeg0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 MOVL $0, AX 30 CPUID 31 CMPL AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtimelfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVL $1, AX 47 CPUID 48 MOVL CX, runtimecpuid_ecx(SB) 49 MOVL DX, runtimecpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it to let it 53 // initialize and to set up GS. if not, 54 // we set up GS ourselves. 55 MOVL _cgo_init(SB), AX 56 TESTL AX, AX 57 JZ needtls 58 MOVL $setg_gcc<>(SB), BX 59 MOVL BX, 4(SP) 60 MOVL BP, 0(SP) 61 CALL AX 62 63 // update stackguard after _cgo_init 64 MOVL $runtimeg0(SB), CX 65 MOVL (g_stack+stack_lo)(CX), AX 66 ADDL $const__StackGuard, AX 67 MOVL AX, g_stackguard0(CX) 68 MOVL AX, g_stackguard1(CX) 69 70 // skip runtimeldt0setup(SB) and tls test after _cgo_init for non-windows 71 CMPL runtimeiswindows(SB), $0 72 JEQ ok 73 needtls: 74 // skip runtimeldt0setup(SB) and tls test on Plan 9 in all cases 75 CMPL runtimeisplan9(SB), $1 76 JEQ ok 77 78 // set up %gs 79 CALL runtimeldt0setup(SB) 80 81 // store through it, to make sure it works 82 get_tls(BX) 83 MOVL $0x123, g(BX) 84 MOVL runtimetls0(SB), AX 85 CMPL AX, $0x123 86 JEQ ok 87 MOVL AX, 0 // abort 88 ok: 89 // set up m and g "registers" 90 get_tls(BX) 91 LEAL runtimeg0(SB), CX 92 MOVL CX, g(BX) 93 LEAL runtimem0(SB), AX 94 95 // save m->g0 = g0 96 MOVL CX, m_g0(AX) 97 // save g0->m = m0 98 MOVL AX, g_m(CX) 99 100 CALL runtimeemptyfunc(SB) // fault if stack check is wrong 101 102 // convention is D is always cleared 103 CLD 104 105 CALL runtimecheck(SB) 106 107 // saved argc, argv 108 MOVL 120(SP), AX 109 MOVL AX, 0(SP) 110 MOVL 124(SP), AX 111 MOVL AX, 4(SP) 112 CALL runtimeargs(SB) 113 CALL runtimeosinit(SB) 114 CALL runtimeschedinit(SB) 115 116 // create a new goroutine to start program 117 PUSHL $runtimemainPC(SB) // entry 118 PUSHL $0 // arg size 119 CALL runtimenewproc(SB) 120 POPL AX 121 POPL AX 122 123 // start this M 124 CALL runtimemstart(SB) 125 126 INT $3 127 RET 128 129 DATA runtimemainPC+0(SB)/4,$runtimemain(SB) 130 GLOBL runtimemainPC(SB),RODATA,$4 131 132 TEXT runtimebreakpoint(SB),NOSPLIT,$0-0 133 INT $3 134 RET 135 136 TEXT runtimeasminit(SB),NOSPLIT,$0-0 137 // Linux and MinGW start the FPU in extended double precision. 138 // Other operating systems use double precision. 139 // Change to double precision to match them, 140 // and to match other hardware that only has double. 141 PUSHL $0x27F 142 FLDCW 0(SP) 143 POPL AX 144 RET 145 146 /* 147 * go-routine 148 */ 149 150 // void gosave(Gobuf*) 151 // save state in Gobuf; setjmp 152 TEXT runtimegosave(SB), NOSPLIT, $0-4 153 MOVL buf+0(FP), AX // gobuf 154 LEAL buf+0(FP), BX // caller's SP 155 MOVL BX, gobuf_sp(AX) 156 MOVL 0(SP), BX // caller's PC 157 MOVL BX, gobuf_pc(AX) 158 MOVL $0, gobuf_ret(AX) 159 MOVL $0, gobuf_ctxt(AX) 160 get_tls(CX) 161 MOVL g(CX), BX 162 MOVL BX, gobuf_g(AX) 163 RET 164 165 // void gogo(Gobuf*) 166 // restore state from Gobuf; longjmp 167 TEXT runtimegogo(SB), NOSPLIT, $0-4 168 MOVL buf+0(FP), BX // gobuf 169 MOVL gobuf_g(BX), DX 170 MOVL 0(DX), CX // make sure g != nil 171 get_tls(CX) 172 MOVL DX, g(CX) 173 MOVL gobuf_sp(BX), SP // restore SP 174 MOVL gobuf_ret(BX), AX 175 MOVL gobuf_ctxt(BX), DX 176 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 177 MOVL $0, gobuf_ret(BX) 178 MOVL $0, gobuf_ctxt(BX) 179 MOVL gobuf_pc(BX), BX 180 JMP BX 181 182 // func mcall(fn func(*g)) 183 // Switch to m->g0's stack, call fn(g). 184 // Fn must never return. It should gogo(&g->sched) 185 // to keep running g. 186 TEXT runtimemcall(SB), NOSPLIT, $0-4 187 MOVL fn+0(FP), DI 188 189 get_tls(CX) 190 MOVL g(CX), AX // save state in g->sched 191 MOVL 0(SP), BX // caller's PC 192 MOVL BX, (g_sched+gobuf_pc)(AX) 193 LEAL fn+0(FP), BX // caller's SP 194 MOVL BX, (g_sched+gobuf_sp)(AX) 195 MOVL AX, (g_sched+gobuf_g)(AX) 196 197 // switch to m->g0 & its stack, call fn 198 MOVL g(CX), BX 199 MOVL g_m(BX), BX 200 MOVL m_g0(BX), SI 201 CMPL SI, AX // if g == m->g0 call badmcall 202 JNE 3(PC) 203 MOVL $runtimebadmcall(SB), AX 204 JMP AX 205 MOVL SI, g(CX) // g = m->g0 206 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 207 PUSHL AX 208 MOVL DI, DX 209 MOVL 0(DI), DI 210 CALL DI 211 POPL AX 212 MOVL $runtimebadmcall2(SB), AX 213 JMP AX 214 RET 215 216 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 217 // of the G stack. We need to distinguish the routine that 218 // lives at the bottom of the G stack from the one that lives 219 // at the top of the system stack because the one at the top of 220 // the system stack terminates the stack walk (see topofstack()). 221 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0 222 RET 223 224 // func systemstack(fn func()) 225 TEXT runtimesystemstack(SB), NOSPLIT, $0-4 226 MOVL fn+0(FP), DI // DI = fn 227 get_tls(CX) 228 MOVL g(CX), AX // AX = g 229 MOVL g_m(AX), BX // BX = m 230 231 MOVL m_gsignal(BX), DX // DX = gsignal 232 CMPL AX, DX 233 JEQ noswitch 234 235 MOVL m_g0(BX), DX // DX = g0 236 CMPL AX, DX 237 JEQ noswitch 238 239 MOVL m_curg(BX), BP 240 CMPL AX, BP 241 JEQ switch 242 243 // Bad: g is not gsignal, not g0, not curg. What is it? 244 // Hide call from linker nosplit analysis. 245 MOVL $runtimebadsystemstack(SB), AX 246 CALL AX 247 248 switch: 249 // save our state in g->sched. Pretend to 250 // be systemstack_switch if the G stack is scanned. 251 MOVL $runtimesystemstack_switch(SB), (g_sched+gobuf_pc)(AX) 252 MOVL SP, (g_sched+gobuf_sp)(AX) 253 MOVL AX, (g_sched+gobuf_g)(AX) 254 255 // switch to g0 256 MOVL DX, g(CX) 257 MOVL (g_sched+gobuf_sp)(DX), BX 258 // make it look like mstart called systemstack on g0, to stop traceback 259 SUBL $4, BX 260 MOVL $runtimemstart(SB), DX 261 MOVL DX, 0(BX) 262 MOVL BX, SP 263 264 // call target function 265 MOVL DI, DX 266 MOVL 0(DI), DI 267 CALL DI 268 269 // switch back to g 270 get_tls(CX) 271 MOVL g(CX), AX 272 MOVL g_m(AX), BX 273 MOVL m_curg(BX), AX 274 MOVL AX, g(CX) 275 MOVL (g_sched+gobuf_sp)(AX), SP 276 MOVL $0, (g_sched+gobuf_sp)(AX) 277 RET 278 279 noswitch: 280 // already on system stack, just call directly 281 MOVL DI, DX 282 MOVL 0(DI), DI 283 CALL DI 284 RET 285 286 /* 287 * support for morestack 288 */ 289 290 // Called during function prolog when more stack is needed. 291 // 292 // The traceback routines see morestack on a g0 as being 293 // the top of a stack (for example, morestack calling newstack 294 // calling the scheduler calling newm calling gc), so we must 295 // record an argument size. For that purpose, it has no arguments. 296 TEXT runtimemorestack(SB),NOSPLIT,$0-0 297 // Cannot grow scheduler stack (m->g0). 298 get_tls(CX) 299 MOVL g(CX), BX 300 MOVL g_m(BX), BX 301 MOVL m_g0(BX), SI 302 CMPL g(CX), SI 303 JNE 2(PC) 304 INT $3 305 306 // Cannot grow signal stack. 307 MOVL m_gsignal(BX), SI 308 CMPL g(CX), SI 309 JNE 2(PC) 310 INT $3 311 312 // Called from f. 313 // Set m->morebuf to f's caller. 314 MOVL 4(SP), DI // f's caller's PC 315 MOVL DI, (m_morebuf+gobuf_pc)(BX) 316 LEAL 8(SP), CX // f's caller's SP 317 MOVL CX, (m_morebuf+gobuf_sp)(BX) 318 get_tls(CX) 319 MOVL g(CX), SI 320 MOVL SI, (m_morebuf+gobuf_g)(BX) 321 322 // Set g->sched to context in f. 323 MOVL 0(SP), AX // f's PC 324 MOVL AX, (g_sched+gobuf_pc)(SI) 325 MOVL SI, (g_sched+gobuf_g)(SI) 326 LEAL 4(SP), AX // f's SP 327 MOVL AX, (g_sched+gobuf_sp)(SI) 328 MOVL DX, (g_sched+gobuf_ctxt)(SI) 329 330 // Call newstack on m->g0's stack. 331 MOVL m_g0(BX), BP 332 MOVL BP, g(CX) 333 MOVL (g_sched+gobuf_sp)(BP), AX 334 MOVL -4(AX), BX // fault if CALL would, before smashing SP 335 MOVL AX, SP 336 CALL runtimenewstack(SB) 337 MOVL $0, 0x1003 // crash if newstack returns 338 RET 339 340 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$0-0 341 MOVL $0, DX 342 JMP runtimemorestack(SB) 343 344 TEXT runtimestackBarrier(SB),NOSPLIT,$0 345 // We came here via a RET to an overwritten return PC. 346 // AX may be live. Other registers are available. 347 348 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 349 get_tls(CX) 350 MOVL g(CX), CX 351 MOVL (g_stkbar+slice_array)(CX), DX 352 MOVL g_stkbarPos(CX), BX 353 IMULL $stkbar__size, BX // Too big for SIB. 354 MOVL stkbar_savedLRVal(DX)(BX*1), BX 355 // Record that this stack barrier was hit. 356 ADDL $1, g_stkbarPos(CX) 357 // Jump to the original return PC. 358 JMP BX 359 360 // reflectcall: call a function with the given argument list 361 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 362 // we don't have variable-sized frames, so we use a small number 363 // of constant-sized-frame functions to encode a few bits of size in the pc. 364 // Caution: ugly multiline assembly macros in your future! 365 366 #define DISPATCH(NAME,MAXSIZE) \ 367 CMPL CX, $MAXSIZE; \ 368 JA 3(PC); \ 369 MOVL $NAME(SB), AX; \ 370 JMP AX 371 // Note: can't just "JMP NAME(SB)" - bad inlining results. 372 373 TEXT reflectcall(SB), NOSPLIT, $0-0 374 JMP reflectcall(SB) 375 376 TEXT reflectcall(SB), NOSPLIT, $0-20 377 MOVL argsize+12(FP), CX 378 DISPATCH(runtimecall16, 16) 379 DISPATCH(runtimecall32, 32) 380 DISPATCH(runtimecall64, 64) 381 DISPATCH(runtimecall128, 128) 382 DISPATCH(runtimecall256, 256) 383 DISPATCH(runtimecall512, 512) 384 DISPATCH(runtimecall1024, 1024) 385 DISPATCH(runtimecall2048, 2048) 386 DISPATCH(runtimecall4096, 4096) 387 DISPATCH(runtimecall8192, 8192) 388 DISPATCH(runtimecall16384, 16384) 389 DISPATCH(runtimecall32768, 32768) 390 DISPATCH(runtimecall65536, 65536) 391 DISPATCH(runtimecall131072, 131072) 392 DISPATCH(runtimecall262144, 262144) 393 DISPATCH(runtimecall524288, 524288) 394 DISPATCH(runtimecall1048576, 1048576) 395 DISPATCH(runtimecall2097152, 2097152) 396 DISPATCH(runtimecall4194304, 4194304) 397 DISPATCH(runtimecall8388608, 8388608) 398 DISPATCH(runtimecall16777216, 16777216) 399 DISPATCH(runtimecall33554432, 33554432) 400 DISPATCH(runtimecall67108864, 67108864) 401 DISPATCH(runtimecall134217728, 134217728) 402 DISPATCH(runtimecall268435456, 268435456) 403 DISPATCH(runtimecall536870912, 536870912) 404 DISPATCH(runtimecall1073741824, 1073741824) 405 MOVL $runtimebadreflectcall(SB), AX 406 JMP AX 407 408 #define CALLFN(NAME,MAXSIZE) \ 409 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 410 NO_LOCAL_POINTERS; \ 411 /* copy arguments to stack */ \ 412 MOVL argptr+8(FP), SI; \ 413 MOVL argsize+12(FP), CX; \ 414 MOVL SP, DI; \ 415 REP;MOVSB; \ 416 /* call function */ \ 417 MOVL f+4(FP), DX; \ 418 MOVL (DX), AX; \ 419 PCDATA $PCDATA_StackMapIndex, $0; \ 420 CALL AX; \ 421 /* copy return values back */ \ 422 MOVL argptr+8(FP), DI; \ 423 MOVL argsize+12(FP), CX; \ 424 MOVL retoffset+16(FP), BX; \ 425 MOVL SP, SI; \ 426 ADDL BX, DI; \ 427 ADDL BX, SI; \ 428 SUBL BX, CX; \ 429 REP;MOVSB; \ 430 /* execute write barrier updates */ \ 431 MOVL argtype+0(FP), DX; \ 432 MOVL argptr+8(FP), DI; \ 433 MOVL argsize+12(FP), CX; \ 434 MOVL retoffset+16(FP), BX; \ 435 MOVL DX, 0(SP); \ 436 MOVL DI, 4(SP); \ 437 MOVL CX, 8(SP); \ 438 MOVL BX, 12(SP); \ 439 CALL runtimecallwritebarrier(SB); \ 440 RET 441 442 CALLFN(call16, 16) 443 CALLFN(call32, 32) 444 CALLFN(call64, 64) 445 CALLFN(call128, 128) 446 CALLFN(call256, 256) 447 CALLFN(call512, 512) 448 CALLFN(call1024, 1024) 449 CALLFN(call2048, 2048) 450 CALLFN(call4096, 4096) 451 CALLFN(call8192, 8192) 452 CALLFN(call16384, 16384) 453 CALLFN(call32768, 32768) 454 CALLFN(call65536, 65536) 455 CALLFN(call131072, 131072) 456 CALLFN(call262144, 262144) 457 CALLFN(call524288, 524288) 458 CALLFN(call1048576, 1048576) 459 CALLFN(call2097152, 2097152) 460 CALLFN(call4194304, 4194304) 461 CALLFN(call8388608, 8388608) 462 CALLFN(call16777216, 16777216) 463 CALLFN(call33554432, 33554432) 464 CALLFN(call67108864, 67108864) 465 CALLFN(call134217728, 134217728) 466 CALLFN(call268435456, 268435456) 467 CALLFN(call536870912, 536870912) 468 CALLFN(call1073741824, 1073741824) 469 470 // bool cas(int32 *val, int32 old, int32 new) 471 // Atomically: 472 // if(*val == old){ 473 // *val = new; 474 // return 1; 475 // }else 476 // return 0; 477 TEXT runtimecas(SB), NOSPLIT, $0-13 478 MOVL ptr+0(FP), BX 479 MOVL old+4(FP), AX 480 MOVL new+8(FP), CX 481 LOCK 482 CMPXCHGL CX, 0(BX) 483 SETEQ ret+12(FP) 484 RET 485 486 TEXT runtimecasuintptr(SB), NOSPLIT, $0-13 487 JMP runtimecas(SB) 488 489 TEXT runtimeatomicloaduintptr(SB), NOSPLIT, $0-8 490 JMP runtimeatomicload(SB) 491 492 TEXT runtimeatomicloaduint(SB), NOSPLIT, $0-8 493 JMP runtimeatomicload(SB) 494 495 TEXT runtimeatomicstoreuintptr(SB), NOSPLIT, $0-8 496 JMP runtimeatomicstore(SB) 497 498 // bool runtimecas64(uint64 *val, uint64 old, uint64 new) 499 // Atomically: 500 // if(*val == *old){ 501 // *val = new; 502 // return 1; 503 // } else { 504 // return 0; 505 // } 506 TEXT runtimecas64(SB), NOSPLIT, $0-21 507 MOVL ptr+0(FP), BP 508 MOVL old_lo+4(FP), AX 509 MOVL old_hi+8(FP), DX 510 MOVL new_lo+12(FP), BX 511 MOVL new_hi+16(FP), CX 512 LOCK 513 CMPXCHG8B 0(BP) 514 SETEQ ret+20(FP) 515 RET 516 517 // bool casp(void **p, void *old, void *new) 518 // Atomically: 519 // if(*p == old){ 520 // *p = new; 521 // return 1; 522 // }else 523 // return 0; 524 TEXT runtimecasp1(SB), NOSPLIT, $0-13 525 MOVL ptr+0(FP), BX 526 MOVL old+4(FP), AX 527 MOVL new+8(FP), CX 528 LOCK 529 CMPXCHGL CX, 0(BX) 530 SETEQ ret+12(FP) 531 RET 532 533 // uint32 xadd(uint32 volatile *val, int32 delta) 534 // Atomically: 535 // *val += delta; 536 // return *val; 537 TEXT runtimexadd(SB), NOSPLIT, $0-12 538 MOVL ptr+0(FP), BX 539 MOVL delta+4(FP), AX 540 MOVL AX, CX 541 LOCK 542 XADDL AX, 0(BX) 543 ADDL CX, AX 544 MOVL AX, ret+8(FP) 545 RET 546 547 TEXT runtimexchg(SB), NOSPLIT, $0-12 548 MOVL ptr+0(FP), BX 549 MOVL new+4(FP), AX 550 XCHGL AX, 0(BX) 551 MOVL AX, ret+8(FP) 552 RET 553 554 TEXT runtimexchgp1(SB), NOSPLIT, $0-12 555 MOVL ptr+0(FP), BX 556 MOVL new+4(FP), AX 557 XCHGL AX, 0(BX) 558 MOVL AX, ret+8(FP) 559 RET 560 561 TEXT runtimexchguintptr(SB), NOSPLIT, $0-12 562 JMP runtimexchg(SB) 563 564 TEXT runtimeprocyield(SB),NOSPLIT,$0-0 565 MOVL cycles+0(FP), AX 566 again: 567 PAUSE 568 SUBL $1, AX 569 JNZ again 570 RET 571 572 TEXT runtimeatomicstorep1(SB), NOSPLIT, $0-8 573 MOVL ptr+0(FP), BX 574 MOVL val+4(FP), AX 575 XCHGL AX, 0(BX) 576 RET 577 578 TEXT runtimeatomicstore(SB), NOSPLIT, $0-8 579 MOVL ptr+0(FP), BX 580 MOVL val+4(FP), AX 581 XCHGL AX, 0(BX) 582 RET 583 584 // uint64 atomicload64(uint64 volatile* addr); 585 TEXT runtimeatomicload64(SB), NOSPLIT, $0-12 586 MOVL ptr+0(FP), AX 587 TESTL $7, AX 588 JZ 2(PC) 589 MOVL 0, AX // crash with nil ptr deref 590 LEAL ret_lo+4(FP), BX 591 // MOVQ (%EAX), %MM0 592 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 593 // MOVQ %MM0, 0(%EBX) 594 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 595 // EMMS 596 BYTE $0x0F; BYTE $0x77 597 RET 598 599 // void runtimeatomicstore64(uint64 volatile* addr, uint64 v); 600 TEXT runtimeatomicstore64(SB), NOSPLIT, $0-12 601 MOVL ptr+0(FP), AX 602 TESTL $7, AX 603 JZ 2(PC) 604 MOVL 0, AX // crash with nil ptr deref 605 // MOVQ and EMMS were introduced on the Pentium MMX. 606 // MOVQ 0x8(%ESP), %MM0 607 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 608 // MOVQ %MM0, (%EAX) 609 BYTE $0x0f; BYTE $0x7f; BYTE $0x00 610 // EMMS 611 BYTE $0x0F; BYTE $0x77 612 // This is essentially a no-op, but it provides required memory fencing. 613 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). 614 MOVL $0, AX 615 LOCK 616 XADDL AX, (SP) 617 RET 618 619 // void runtimeatomicor8(byte volatile*, byte); 620 TEXT runtimeatomicor8(SB), NOSPLIT, $0-5 621 MOVL ptr+0(FP), AX 622 MOVB val+4(FP), BX 623 LOCK 624 ORB BX, (AX) 625 RET 626 627 // void runtimeatomicand8(byte volatile*, byte); 628 TEXT runtimeatomicand8(SB), NOSPLIT, $0-5 629 MOVL ptr+0(FP), AX 630 MOVB val+4(FP), BX 631 LOCK 632 ANDB BX, (AX) 633 RET 634 635 TEXT publicationBarrier(SB),NOSPLIT,$0-0 636 // Stores are already ordered on x86, so this is just a 637 // compile barrier. 638 RET 639 640 // void jmpdefer(fn, sp); 641 // called from deferreturn. 642 // 1. pop the caller 643 // 2. sub 5 bytes from the callers return 644 // 3. jmp to the argument 645 TEXT runtimejmpdefer(SB), NOSPLIT, $0-8 646 MOVL fv+0(FP), DX // fn 647 MOVL argp+4(FP), BX // caller sp 648 LEAL -4(BX), SP // caller sp after CALL 649 SUBL $5, (SP) // return to CALL again 650 MOVL 0(DX), BX 651 JMP BX // but first run the deferred function 652 653 // Save state of caller into g->sched. 654 TEXT gosave<>(SB),NOSPLIT,$0 655 PUSHL AX 656 PUSHL BX 657 get_tls(BX) 658 MOVL g(BX), BX 659 LEAL arg+0(FP), AX 660 MOVL AX, (g_sched+gobuf_sp)(BX) 661 MOVL -4(AX), AX 662 MOVL AX, (g_sched+gobuf_pc)(BX) 663 MOVL $0, (g_sched+gobuf_ret)(BX) 664 MOVL $0, (g_sched+gobuf_ctxt)(BX) 665 POPL BX 666 POPL AX 667 RET 668 669 // func asmcgocall(fn, arg unsafe.Pointer) int32 670 // Call fn(arg) on the scheduler stack, 671 // aligned appropriately for the gcc ABI. 672 // See cgocall.go for more details. 673 TEXT asmcgocall(SB),NOSPLIT,$0-12 674 MOVL fn+0(FP), AX 675 MOVL arg+4(FP), BX 676 677 MOVL SP, DX 678 679 // Figure out if we need to switch to m->g0 stack. 680 // We get called to create new OS threads too, and those 681 // come in on the m->g0 stack already. 682 get_tls(CX) 683 MOVL g(CX), BP 684 MOVL g_m(BP), BP 685 MOVL m_g0(BP), SI 686 MOVL g(CX), DI 687 CMPL SI, DI 688 JEQ 4(PC) 689 CALL gosave<>(SB) 690 MOVL SI, g(CX) 691 MOVL (g_sched+gobuf_sp)(SI), SP 692 693 // Now on a scheduling stack (a pthread-created stack). 694 SUBL $32, SP 695 ANDL $~15, SP // alignment, perhaps unnecessary 696 MOVL DI, 8(SP) // save g 697 MOVL (g_stack+stack_hi)(DI), DI 698 SUBL DX, DI 699 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 700 MOVL BX, 0(SP) // first argument in x86-32 ABI 701 CALL AX 702 703 // Restore registers, g, stack pointer. 704 get_tls(CX) 705 MOVL 8(SP), DI 706 MOVL (g_stack+stack_hi)(DI), SI 707 SUBL 4(SP), SI 708 MOVL DI, g(CX) 709 MOVL SI, SP 710 711 MOVL AX, ret+8(FP) 712 RET 713 714 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 715 // Turn the fn into a Go func (by taking its address) and call 716 // cgocallback_gofunc. 717 TEXT runtimecgocallback(SB),NOSPLIT,$12-12 718 LEAL fn+0(FP), AX 719 MOVL AX, 0(SP) 720 MOVL frame+4(FP), AX 721 MOVL AX, 4(SP) 722 MOVL framesize+8(FP), AX 723 MOVL AX, 8(SP) 724 MOVL $runtimecgocallback_gofunc(SB), AX 725 CALL AX 726 RET 727 728 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 729 // See cgocall.go for more details. 730 TEXT cgocallback_gofunc(SB),NOSPLIT,$12-12 731 NO_LOCAL_POINTERS 732 733 // If g is nil, Go did not create the current thread. 734 // Call needm to obtain one for temporary use. 735 // In this case, we're running on the thread stack, so there's 736 // lots of space, but the linker doesn't know. Hide the call from 737 // the linker analysis by using an indirect call through AX. 738 get_tls(CX) 739 #ifdef GOOS_windows 740 MOVL $0, BP 741 CMPL CX, $0 742 JEQ 2(PC) // TODO 743 #endif 744 MOVL g(CX), BP 745 CMPL BP, $0 746 JEQ needm 747 MOVL g_m(BP), BP 748 MOVL BP, DX // saved copy of oldm 749 JMP havem 750 needm: 751 MOVL $0, 0(SP) 752 MOVL $runtimeneedm(SB), AX 753 CALL AX 754 MOVL 0(SP), DX 755 get_tls(CX) 756 MOVL g(CX), BP 757 MOVL g_m(BP), BP 758 759 // Set m->sched.sp = SP, so that if a panic happens 760 // during the function we are about to execute, it will 761 // have a valid SP to run on the g0 stack. 762 // The next few lines (after the havem label) 763 // will save this SP onto the stack and then write 764 // the same SP back to m->sched.sp. That seems redundant, 765 // but if an unrecovered panic happens, unwindm will 766 // restore the g->sched.sp from the stack location 767 // and then systemstack will try to use it. If we don't set it here, 768 // that restored SP will be uninitialized (typically 0) and 769 // will not be usable. 770 MOVL m_g0(BP), SI 771 MOVL SP, (g_sched+gobuf_sp)(SI) 772 773 havem: 774 // Now there's a valid m, and we're running on its m->g0. 775 // Save current m->g0->sched.sp on stack and then set it to SP. 776 // Save current sp in m->g0->sched.sp in preparation for 777 // switch back to m->curg stack. 778 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 779 MOVL m_g0(BP), SI 780 MOVL (g_sched+gobuf_sp)(SI), AX 781 MOVL AX, 0(SP) 782 MOVL SP, (g_sched+gobuf_sp)(SI) 783 784 // Switch to m->curg stack and call runtime.cgocallbackg. 785 // Because we are taking over the execution of m->curg 786 // but *not* resuming what had been running, we need to 787 // save that information (m->curg->sched) so we can restore it. 788 // We can restore m->curg->sched.sp easily, because calling 789 // runtime.cgocallbackg leaves SP unchanged upon return. 790 // To save m->curg->sched.pc, we push it onto the stack. 791 // This has the added benefit that it looks to the traceback 792 // routine like cgocallbackg is going to return to that 793 // PC (because the frame we allocate below has the same 794 // size as cgocallback_gofunc's frame declared above) 795 // so that the traceback will seamlessly trace back into 796 // the earlier calls. 797 // 798 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 799 // 4(SP) and 8(SP) are unused. 800 MOVL m_curg(BP), SI 801 MOVL SI, g(CX) 802 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 803 MOVL (g_sched+gobuf_pc)(SI), BP 804 MOVL BP, -4(DI) 805 LEAL -(4+12)(DI), SP 806 MOVL DX, 0(SP) 807 CALL runtimecgocallbackg(SB) 808 MOVL 0(SP), DX 809 810 // Restore g->sched (== m->curg->sched) from saved values. 811 get_tls(CX) 812 MOVL g(CX), SI 813 MOVL 12(SP), BP 814 MOVL BP, (g_sched+gobuf_pc)(SI) 815 LEAL (12+4)(SP), DI 816 MOVL DI, (g_sched+gobuf_sp)(SI) 817 818 // Switch back to m->g0's stack and restore m->g0->sched.sp. 819 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 820 // so we do not have to restore it.) 821 MOVL g(CX), BP 822 MOVL g_m(BP), BP 823 MOVL m_g0(BP), SI 824 MOVL SI, g(CX) 825 MOVL (g_sched+gobuf_sp)(SI), SP 826 MOVL 0(SP), AX 827 MOVL AX, (g_sched+gobuf_sp)(SI) 828 829 // If the m on entry was nil, we called needm above to borrow an m 830 // for the duration of the call. Since the call is over, return it with dropm. 831 CMPL DX, $0 832 JNE 3(PC) 833 MOVL $runtimedropm(SB), AX 834 CALL AX 835 836 // Done! 837 RET 838 839 // void setg(G*); set g. for use by needm. 840 TEXT runtimesetg(SB), NOSPLIT, $0-4 841 MOVL gg+0(FP), BX 842 #ifdef GOOS_windows 843 CMPL BX, $0 844 JNE settls 845 MOVL $0, 0x14(FS) 846 RET 847 settls: 848 MOVL g_m(BX), AX 849 LEAL m_tls(AX), AX 850 MOVL AX, 0x14(FS) 851 #endif 852 get_tls(CX) 853 MOVL BX, g(CX) 854 RET 855 856 // void setg_gcc(G*); set g. for use by gcc 857 TEXT setg_gcc<>(SB), NOSPLIT, $0 858 get_tls(AX) 859 MOVL gg+0(FP), DX 860 MOVL DX, g(AX) 861 RET 862 863 // check that SP is in range [g->stack.lo, g->stack.hi) 864 TEXT runtimestackcheck(SB), NOSPLIT, $0-0 865 get_tls(CX) 866 MOVL g(CX), AX 867 CMPL (g_stack+stack_hi)(AX), SP 868 JHI 2(PC) 869 INT $3 870 CMPL SP, (g_stack+stack_lo)(AX) 871 JHI 2(PC) 872 INT $3 873 RET 874 875 TEXT runtimegetcallerpc(SB),NOSPLIT,$4-8 876 MOVL argp+0(FP),AX // addr of first arg 877 MOVL -4(AX),AX // get calling pc 878 CMPL AX, runtimestackBarrierPC(SB) 879 JNE nobar 880 // Get original return PC. 881 CALL runtimenextBarrierPC(SB) 882 MOVL 0(SP), AX 883 nobar: 884 MOVL AX, ret+4(FP) 885 RET 886 887 TEXT runtimesetcallerpc(SB),NOSPLIT,$4-8 888 MOVL argp+0(FP),AX // addr of first arg 889 MOVL pc+4(FP), BX 890 MOVL -4(AX), CX 891 CMPL CX, runtimestackBarrierPC(SB) 892 JEQ setbar 893 MOVL BX, -4(AX) // set calling pc 894 RET 895 setbar: 896 // Set the stack barrier return PC. 897 MOVL BX, 0(SP) 898 CALL runtimesetNextBarrierPC(SB) 899 RET 900 901 TEXT runtimegetcallersp(SB), NOSPLIT, $0-8 902 MOVL argp+0(FP), AX 903 MOVL AX, ret+4(FP) 904 RET 905 906 // func cputicks() int64 907 TEXT runtimecputicks(SB),NOSPLIT,$0-8 908 TESTL $0x4000000, runtimecpuid_edx(SB) // no sse2, no mfence 909 JEQ done 910 CMPB runtimelfenceBeforeRdtsc(SB), $1 911 JNE mfence 912 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 913 JMP done 914 mfence: 915 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 916 done: 917 RDTSC 918 MOVL AX, ret_lo+0(FP) 919 MOVL DX, ret_hi+4(FP) 920 RET 921 922 TEXT runtimeldt0setup(SB),NOSPLIT,$16-0 923 // set up ldt 7 to point at tls0 924 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 925 // the entry number is just a hint. setldt will set up GS with what it used. 926 MOVL $7, 0(SP) 927 LEAL runtimetls0(SB), AX 928 MOVL AX, 4(SP) 929 MOVL $32, 8(SP) // sizeof(tls array) 930 CALL runtimesetldt(SB) 931 RET 932 933 TEXT runtimeemptyfunc(SB),0,$0-0 934 RET 935 936 TEXT runtimeabort(SB),NOSPLIT,$0-0 937 INT $0x3 938 939 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 940 // redirects to memhash(p, h, size) using the size 941 // stored in the closure. 942 TEXT runtimememhash_varlen(SB),NOSPLIT,$16-12 943 GO_ARGS 944 NO_LOCAL_POINTERS 945 MOVL p+0(FP), AX 946 MOVL h+4(FP), BX 947 MOVL 4(DX), CX 948 MOVL AX, 0(SP) 949 MOVL BX, 4(SP) 950 MOVL CX, 8(SP) 951 CALL runtimememhash(SB) 952 MOVL 12(SP), AX 953 MOVL AX, ret+8(FP) 954 RET 955 956 // hash function using AES hardware instructions 957 TEXT runtimeaeshash(SB),NOSPLIT,$0-16 958 MOVL p+0(FP), AX // ptr to data 959 MOVL s+8(FP), CX // size 960 LEAL ret+12(FP), DX 961 JMP runtimeaeshashbody(SB) 962 963 TEXT runtimeaeshashstr(SB),NOSPLIT,$0-12 964 MOVL p+0(FP), AX // ptr to string object 965 MOVL 4(AX), CX // length of string 966 MOVL (AX), AX // string data 967 LEAL ret+8(FP), DX 968 JMP runtimeaeshashbody(SB) 969 970 // AX: data 971 // CX: length 972 // DX: address to put return value 973 TEXT runtimeaeshashbody(SB),NOSPLIT,$0-0 974 MOVL h+4(FP), X6 // seed to low 64 bits of xmm6 975 PINSRD $2, CX, X6 // size to high 64 bits of xmm6 976 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 977 MOVO runtimeaeskeysched(SB), X7 978 CMPL CX, $16 979 JB aes0to15 980 JE aes16 981 CMPL CX, $32 982 JBE aes17to32 983 CMPL CX, $64 984 JBE aes33to64 985 JMP aes65plus 986 987 aes0to15: 988 TESTL CX, CX 989 JE aes0 990 991 ADDL $16, AX 992 TESTW $0xff0, AX 993 JE endofpage 994 995 // 16 bytes loaded at this address won't cross 996 // a page boundary, so we can load it directly. 997 MOVOU -16(AX), X0 998 ADDL CX, CX 999 PAND masks<>(SB)(CX*8), X0 1000 1001 // scramble 3 times 1002 AESENC X6, X0 1003 AESENC X7, X0 1004 AESENC X7, X0 1005 MOVL X0, (DX) 1006 RET 1007 1008 endofpage: 1009 // address ends in 1111xxxx. Might be up against 1010 // a page boundary, so load ending at last byte. 1011 // Then shift bytes down using pshufb. 1012 MOVOU -32(AX)(CX*1), X0 1013 ADDL CX, CX 1014 PSHUFB shifts<>(SB)(CX*8), X0 1015 AESENC X6, X0 1016 AESENC X7, X0 1017 AESENC X7, X0 1018 MOVL X0, (DX) 1019 RET 1020 1021 aes0: 1022 // return input seed 1023 MOVL h+4(FP), AX 1024 MOVL AX, (DX) 1025 RET 1026 1027 aes16: 1028 MOVOU (AX), X0 1029 AESENC X6, X0 1030 AESENC X7, X0 1031 AESENC X7, X0 1032 MOVL X0, (DX) 1033 RET 1034 1035 1036 aes17to32: 1037 // load data to be hashed 1038 MOVOU (AX), X0 1039 MOVOU -16(AX)(CX*1), X1 1040 1041 // scramble 3 times 1042 AESENC X6, X0 1043 AESENC runtimeaeskeysched+16(SB), X1 1044 AESENC X7, X0 1045 AESENC X7, X1 1046 AESENC X7, X0 1047 AESENC X7, X1 1048 1049 // combine results 1050 PXOR X1, X0 1051 MOVL X0, (DX) 1052 RET 1053 1054 aes33to64: 1055 MOVOU (AX), X0 1056 MOVOU 16(AX), X1 1057 MOVOU -32(AX)(CX*1), X2 1058 MOVOU -16(AX)(CX*1), X3 1059 1060 AESENC X6, X0 1061 AESENC runtimeaeskeysched+16(SB), X1 1062 AESENC runtimeaeskeysched+32(SB), X2 1063 AESENC runtimeaeskeysched+48(SB), X3 1064 AESENC X7, X0 1065 AESENC X7, X1 1066 AESENC X7, X2 1067 AESENC X7, X3 1068 AESENC X7, X0 1069 AESENC X7, X1 1070 AESENC X7, X2 1071 AESENC X7, X3 1072 1073 PXOR X2, X0 1074 PXOR X3, X1 1075 PXOR X1, X0 1076 MOVL X0, (DX) 1077 RET 1078 1079 aes65plus: 1080 // start with last (possibly overlapping) block 1081 MOVOU -64(AX)(CX*1), X0 1082 MOVOU -48(AX)(CX*1), X1 1083 MOVOU -32(AX)(CX*1), X2 1084 MOVOU -16(AX)(CX*1), X3 1085 1086 // scramble state once 1087 AESENC X6, X0 1088 AESENC runtimeaeskeysched+16(SB), X1 1089 AESENC runtimeaeskeysched+32(SB), X2 1090 AESENC runtimeaeskeysched+48(SB), X3 1091 1092 // compute number of remaining 64-byte blocks 1093 DECL CX 1094 SHRL $6, CX 1095 1096 aesloop: 1097 // scramble state, xor in a block 1098 MOVOU (AX), X4 1099 MOVOU 16(AX), X5 1100 AESENC X4, X0 1101 AESENC X5, X1 1102 MOVOU 32(AX), X4 1103 MOVOU 48(AX), X5 1104 AESENC X4, X2 1105 AESENC X5, X3 1106 1107 // scramble state 1108 AESENC X7, X0 1109 AESENC X7, X1 1110 AESENC X7, X2 1111 AESENC X7, X3 1112 1113 ADDL $64, AX 1114 DECL CX 1115 JNE aesloop 1116 1117 // 2 more scrambles to finish 1118 AESENC X7, X0 1119 AESENC X7, X1 1120 AESENC X7, X2 1121 AESENC X7, X3 1122 AESENC X7, X0 1123 AESENC X7, X1 1124 AESENC X7, X2 1125 AESENC X7, X3 1126 1127 PXOR X2, X0 1128 PXOR X3, X1 1129 PXOR X1, X0 1130 MOVL X0, (DX) 1131 RET 1132 1133 TEXT runtimeaeshash32(SB),NOSPLIT,$0-12 1134 MOVL p+0(FP), AX // ptr to data 1135 MOVL h+4(FP), X0 // seed 1136 PINSRD $1, (AX), X0 // data 1137 AESENC runtimeaeskeysched+0(SB), X0 1138 AESENC runtimeaeskeysched+16(SB), X0 1139 AESENC runtimeaeskeysched+32(SB), X0 1140 MOVL X0, ret+8(FP) 1141 RET 1142 1143 TEXT runtimeaeshash64(SB),NOSPLIT,$0-12 1144 MOVL p+0(FP), AX // ptr to data 1145 MOVQ (AX), X0 // data 1146 PINSRD $2, h+4(FP), X0 // seed 1147 AESENC runtimeaeskeysched+0(SB), X0 1148 AESENC runtimeaeskeysched+16(SB), X0 1149 AESENC runtimeaeskeysched+32(SB), X0 1150 MOVL X0, ret+8(FP) 1151 RET 1152 1153 // simple mask to get rid of data in the high part of the register. 1154 DATA masks<>+0x00(SB)/4, $0x00000000 1155 DATA masks<>+0x04(SB)/4, $0x00000000 1156 DATA masks<>+0x08(SB)/4, $0x00000000 1157 DATA masks<>+0x0c(SB)/4, $0x00000000 1158 1159 DATA masks<>+0x10(SB)/4, $0x000000ff 1160 DATA masks<>+0x14(SB)/4, $0x00000000 1161 DATA masks<>+0x18(SB)/4, $0x00000000 1162 DATA masks<>+0x1c(SB)/4, $0x00000000 1163 1164 DATA masks<>+0x20(SB)/4, $0x0000ffff 1165 DATA masks<>+0x24(SB)/4, $0x00000000 1166 DATA masks<>+0x28(SB)/4, $0x00000000 1167 DATA masks<>+0x2c(SB)/4, $0x00000000 1168 1169 DATA masks<>+0x30(SB)/4, $0x00ffffff 1170 DATA masks<>+0x34(SB)/4, $0x00000000 1171 DATA masks<>+0x38(SB)/4, $0x00000000 1172 DATA masks<>+0x3c(SB)/4, $0x00000000 1173 1174 DATA masks<>+0x40(SB)/4, $0xffffffff 1175 DATA masks<>+0x44(SB)/4, $0x00000000 1176 DATA masks<>+0x48(SB)/4, $0x00000000 1177 DATA masks<>+0x4c(SB)/4, $0x00000000 1178 1179 DATA masks<>+0x50(SB)/4, $0xffffffff 1180 DATA masks<>+0x54(SB)/4, $0x000000ff 1181 DATA masks<>+0x58(SB)/4, $0x00000000 1182 DATA masks<>+0x5c(SB)/4, $0x00000000 1183 1184 DATA masks<>+0x60(SB)/4, $0xffffffff 1185 DATA masks<>+0x64(SB)/4, $0x0000ffff 1186 DATA masks<>+0x68(SB)/4, $0x00000000 1187 DATA masks<>+0x6c(SB)/4, $0x00000000 1188 1189 DATA masks<>+0x70(SB)/4, $0xffffffff 1190 DATA masks<>+0x74(SB)/4, $0x00ffffff 1191 DATA masks<>+0x78(SB)/4, $0x00000000 1192 DATA masks<>+0x7c(SB)/4, $0x00000000 1193 1194 DATA masks<>+0x80(SB)/4, $0xffffffff 1195 DATA masks<>+0x84(SB)/4, $0xffffffff 1196 DATA masks<>+0x88(SB)/4, $0x00000000 1197 DATA masks<>+0x8c(SB)/4, $0x00000000 1198 1199 DATA masks<>+0x90(SB)/4, $0xffffffff 1200 DATA masks<>+0x94(SB)/4, $0xffffffff 1201 DATA masks<>+0x98(SB)/4, $0x000000ff 1202 DATA masks<>+0x9c(SB)/4, $0x00000000 1203 1204 DATA masks<>+0xa0(SB)/4, $0xffffffff 1205 DATA masks<>+0xa4(SB)/4, $0xffffffff 1206 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1207 DATA masks<>+0xac(SB)/4, $0x00000000 1208 1209 DATA masks<>+0xb0(SB)/4, $0xffffffff 1210 DATA masks<>+0xb4(SB)/4, $0xffffffff 1211 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1212 DATA masks<>+0xbc(SB)/4, $0x00000000 1213 1214 DATA masks<>+0xc0(SB)/4, $0xffffffff 1215 DATA masks<>+0xc4(SB)/4, $0xffffffff 1216 DATA masks<>+0xc8(SB)/4, $0xffffffff 1217 DATA masks<>+0xcc(SB)/4, $0x00000000 1218 1219 DATA masks<>+0xd0(SB)/4, $0xffffffff 1220 DATA masks<>+0xd4(SB)/4, $0xffffffff 1221 DATA masks<>+0xd8(SB)/4, $0xffffffff 1222 DATA masks<>+0xdc(SB)/4, $0x000000ff 1223 1224 DATA masks<>+0xe0(SB)/4, $0xffffffff 1225 DATA masks<>+0xe4(SB)/4, $0xffffffff 1226 DATA masks<>+0xe8(SB)/4, $0xffffffff 1227 DATA masks<>+0xec(SB)/4, $0x0000ffff 1228 1229 DATA masks<>+0xf0(SB)/4, $0xffffffff 1230 DATA masks<>+0xf4(SB)/4, $0xffffffff 1231 DATA masks<>+0xf8(SB)/4, $0xffffffff 1232 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1233 1234 GLOBL masks<>(SB),RODATA,$256 1235 1236 // these are arguments to pshufb. They move data down from 1237 // the high bytes of the register to the low bytes of the register. 1238 // index is how many bytes to move. 1239 DATA shifts<>+0x00(SB)/4, $0x00000000 1240 DATA shifts<>+0x04(SB)/4, $0x00000000 1241 DATA shifts<>+0x08(SB)/4, $0x00000000 1242 DATA shifts<>+0x0c(SB)/4, $0x00000000 1243 1244 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1245 DATA shifts<>+0x14(SB)/4, $0xffffffff 1246 DATA shifts<>+0x18(SB)/4, $0xffffffff 1247 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1248 1249 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1250 DATA shifts<>+0x24(SB)/4, $0xffffffff 1251 DATA shifts<>+0x28(SB)/4, $0xffffffff 1252 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1253 1254 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1255 DATA shifts<>+0x34(SB)/4, $0xffffffff 1256 DATA shifts<>+0x38(SB)/4, $0xffffffff 1257 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1258 1259 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1260 DATA shifts<>+0x44(SB)/4, $0xffffffff 1261 DATA shifts<>+0x48(SB)/4, $0xffffffff 1262 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1263 1264 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1265 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1266 DATA shifts<>+0x58(SB)/4, $0xffffffff 1267 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1268 1269 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1270 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1271 DATA shifts<>+0x68(SB)/4, $0xffffffff 1272 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1273 1274 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1275 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1276 DATA shifts<>+0x78(SB)/4, $0xffffffff 1277 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1278 1279 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1280 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1281 DATA shifts<>+0x88(SB)/4, $0xffffffff 1282 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1283 1284 DATA shifts<>+0x90(SB)/4, $0x0a090807 1285 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1286 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1287 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1288 1289 DATA shifts<>+0xa0(SB)/4, $0x09080706 1290 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1291 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1292 DATA shifts<>+0xac(SB)/4, $0xffffffff 1293 1294 DATA shifts<>+0xb0(SB)/4, $0x08070605 1295 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1296 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1297 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1298 1299 DATA shifts<>+0xc0(SB)/4, $0x07060504 1300 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1301 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1302 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1303 1304 DATA shifts<>+0xd0(SB)/4, $0x06050403 1305 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1306 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1307 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1308 1309 DATA shifts<>+0xe0(SB)/4, $0x05040302 1310 DATA shifts<>+0xe4(SB)/4, $0x09080706 1311 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1312 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1313 1314 DATA shifts<>+0xf0(SB)/4, $0x04030201 1315 DATA shifts<>+0xf4(SB)/4, $0x08070605 1316 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1317 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1318 1319 GLOBL shifts<>(SB),RODATA,$256 1320 1321 TEXT runtimememeq(SB),NOSPLIT,$0-13 1322 MOVL a+0(FP), SI 1323 MOVL b+4(FP), DI 1324 MOVL size+8(FP), BX 1325 LEAL ret+12(FP), AX 1326 JMP runtimememeqbody(SB) 1327 1328 // memequal_varlen(a, b unsafe.Pointer) bool 1329 TEXT runtimememequal_varlen(SB),NOSPLIT,$0-9 1330 MOVL a+0(FP), SI 1331 MOVL b+4(FP), DI 1332 CMPL SI, DI 1333 JEQ eq 1334 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1335 LEAL ret+8(FP), AX 1336 JMP runtimememeqbody(SB) 1337 eq: 1338 MOVB $1, ret+8(FP) 1339 RET 1340 1341 // eqstring tests whether two strings are equal. 1342 // The compiler guarantees that strings passed 1343 // to eqstring have equal length. 1344 // See runtime_test.go:eqstring_generic for 1345 // equivalent Go code. 1346 TEXT runtimeeqstring(SB),NOSPLIT,$0-17 1347 MOVL s1str+0(FP), SI 1348 MOVL s2str+8(FP), DI 1349 CMPL SI, DI 1350 JEQ same 1351 MOVL s1len+4(FP), BX 1352 LEAL v+16(FP), AX 1353 JMP runtimememeqbody(SB) 1354 same: 1355 MOVB $1, v+16(FP) 1356 RET 1357 1358 TEXT bytesEqual(SB),NOSPLIT,$0-25 1359 MOVL a_len+4(FP), BX 1360 MOVL b_len+16(FP), CX 1361 CMPL BX, CX 1362 JNE eqret 1363 MOVL a+0(FP), SI 1364 MOVL b+12(FP), DI 1365 LEAL ret+24(FP), AX 1366 JMP runtimememeqbody(SB) 1367 eqret: 1368 MOVB $0, ret+24(FP) 1369 RET 1370 1371 // a in SI 1372 // b in DI 1373 // count in BX 1374 // address of result byte in AX 1375 TEXT runtimememeqbody(SB),NOSPLIT,$0-0 1376 CMPL BX, $4 1377 JB small 1378 1379 // 64 bytes at a time using xmm registers 1380 hugeloop: 1381 CMPL BX, $64 1382 JB bigloop 1383 TESTL $0x4000000, runtimecpuid_edx(SB) // check for sse2 1384 JE bigloop 1385 MOVOU (SI), X0 1386 MOVOU (DI), X1 1387 MOVOU 16(SI), X2 1388 MOVOU 16(DI), X3 1389 MOVOU 32(SI), X4 1390 MOVOU 32(DI), X5 1391 MOVOU 48(SI), X6 1392 MOVOU 48(DI), X7 1393 PCMPEQB X1, X0 1394 PCMPEQB X3, X2 1395 PCMPEQB X5, X4 1396 PCMPEQB X7, X6 1397 PAND X2, X0 1398 PAND X6, X4 1399 PAND X4, X0 1400 PMOVMSKB X0, DX 1401 ADDL $64, SI 1402 ADDL $64, DI 1403 SUBL $64, BX 1404 CMPL DX, $0xffff 1405 JEQ hugeloop 1406 MOVB $0, (AX) 1407 RET 1408 1409 // 4 bytes at a time using 32-bit register 1410 bigloop: 1411 CMPL BX, $4 1412 JBE leftover 1413 MOVL (SI), CX 1414 MOVL (DI), DX 1415 ADDL $4, SI 1416 ADDL $4, DI 1417 SUBL $4, BX 1418 CMPL CX, DX 1419 JEQ bigloop 1420 MOVB $0, (AX) 1421 RET 1422 1423 // remaining 0-4 bytes 1424 leftover: 1425 MOVL -4(SI)(BX*1), CX 1426 MOVL -4(DI)(BX*1), DX 1427 CMPL CX, DX 1428 SETEQ (AX) 1429 RET 1430 1431 small: 1432 CMPL BX, $0 1433 JEQ equal 1434 1435 LEAL 0(BX*8), CX 1436 NEGL CX 1437 1438 MOVL SI, DX 1439 CMPB DX, $0xfc 1440 JA si_high 1441 1442 // load at SI won't cross a page boundary. 1443 MOVL (SI), SI 1444 JMP si_finish 1445 si_high: 1446 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1447 MOVL -4(SI)(BX*1), SI 1448 SHRL CX, SI 1449 si_finish: 1450 1451 // same for DI. 1452 MOVL DI, DX 1453 CMPB DX, $0xfc 1454 JA di_high 1455 MOVL (DI), DI 1456 JMP di_finish 1457 di_high: 1458 MOVL -4(DI)(BX*1), DI 1459 SHRL CX, DI 1460 di_finish: 1461 1462 SUBL SI, DI 1463 SHLL CX, DI 1464 equal: 1465 SETEQ (AX) 1466 RET 1467 1468 TEXT runtimecmpstring(SB),NOSPLIT,$0-20 1469 MOVL s1_base+0(FP), SI 1470 MOVL s1_len+4(FP), BX 1471 MOVL s2_base+8(FP), DI 1472 MOVL s2_len+12(FP), DX 1473 LEAL ret+16(FP), AX 1474 JMP runtimecmpbody(SB) 1475 1476 TEXT bytesCompare(SB),NOSPLIT,$0-28 1477 MOVL s1+0(FP), SI 1478 MOVL s1+4(FP), BX 1479 MOVL s2+12(FP), DI 1480 MOVL s2+16(FP), DX 1481 LEAL ret+24(FP), AX 1482 JMP runtimecmpbody(SB) 1483 1484 TEXT bytesIndexByte(SB),NOSPLIT,$0-20 1485 MOVL s+0(FP), SI 1486 MOVL s_len+4(FP), CX 1487 MOVB c+12(FP), AL 1488 MOVL SI, DI 1489 CLD; REPN; SCASB 1490 JZ 3(PC) 1491 MOVL $-1, ret+16(FP) 1492 RET 1493 SUBL SI, DI 1494 SUBL $1, DI 1495 MOVL DI, ret+16(FP) 1496 RET 1497 1498 TEXT stringsIndexByte(SB),NOSPLIT,$0-16 1499 MOVL s+0(FP), SI 1500 MOVL s_len+4(FP), CX 1501 MOVB c+8(FP), AL 1502 MOVL SI, DI 1503 CLD; REPN; SCASB 1504 JZ 3(PC) 1505 MOVL $-1, ret+12(FP) 1506 RET 1507 SUBL SI, DI 1508 SUBL $1, DI 1509 MOVL DI, ret+12(FP) 1510 RET 1511 1512 // input: 1513 // SI = a 1514 // DI = b 1515 // BX = alen 1516 // DX = blen 1517 // AX = address of return word (set to 1/0/-1) 1518 TEXT runtimecmpbody(SB),NOSPLIT,$0-0 1519 MOVL DX, BP 1520 SUBL BX, DX // DX = blen-alen 1521 CMOVLGT BX, BP // BP = min(alen, blen) 1522 CMPL SI, DI 1523 JEQ allsame 1524 CMPL BP, $4 1525 JB small 1526 TESTL $0x4000000, runtimecpuid_edx(SB) // check for sse2 1527 JE mediumloop 1528 largeloop: 1529 CMPL BP, $16 1530 JB mediumloop 1531 MOVOU (SI), X0 1532 MOVOU (DI), X1 1533 PCMPEQB X0, X1 1534 PMOVMSKB X1, BX 1535 XORL $0xffff, BX // convert EQ to NE 1536 JNE diff16 // branch if at least one byte is not equal 1537 ADDL $16, SI 1538 ADDL $16, DI 1539 SUBL $16, BP 1540 JMP largeloop 1541 1542 diff16: 1543 BSFL BX, BX // index of first byte that differs 1544 XORL DX, DX 1545 MOVB (SI)(BX*1), CX 1546 CMPB CX, (DI)(BX*1) 1547 SETHI DX 1548 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1549 MOVL DX, (AX) 1550 RET 1551 1552 mediumloop: 1553 CMPL BP, $4 1554 JBE _0through4 1555 MOVL (SI), BX 1556 MOVL (DI), CX 1557 CMPL BX, CX 1558 JNE diff4 1559 ADDL $4, SI 1560 ADDL $4, DI 1561 SUBL $4, BP 1562 JMP mediumloop 1563 1564 _0through4: 1565 MOVL -4(SI)(BP*1), BX 1566 MOVL -4(DI)(BP*1), CX 1567 CMPL BX, CX 1568 JEQ allsame 1569 1570 diff4: 1571 BSWAPL BX // reverse order of bytes 1572 BSWAPL CX 1573 XORL BX, CX // find bit differences 1574 BSRL CX, CX // index of highest bit difference 1575 SHRL CX, BX // move a's bit to bottom 1576 ANDL $1, BX // mask bit 1577 LEAL -1(BX*2), BX // 1/0 => +1/-1 1578 MOVL BX, (AX) 1579 RET 1580 1581 // 0-3 bytes in common 1582 small: 1583 LEAL (BP*8), CX 1584 NEGL CX 1585 JEQ allsame 1586 1587 // load si 1588 CMPB SI, $0xfc 1589 JA si_high 1590 MOVL (SI), SI 1591 JMP si_finish 1592 si_high: 1593 MOVL -4(SI)(BP*1), SI 1594 SHRL CX, SI 1595 si_finish: 1596 SHLL CX, SI 1597 1598 // same for di 1599 CMPB DI, $0xfc 1600 JA di_high 1601 MOVL (DI), DI 1602 JMP di_finish 1603 di_high: 1604 MOVL -4(DI)(BP*1), DI 1605 SHRL CX, DI 1606 di_finish: 1607 SHLL CX, DI 1608 1609 BSWAPL SI // reverse order of bytes 1610 BSWAPL DI 1611 XORL SI, DI // find bit differences 1612 JEQ allsame 1613 BSRL DI, CX // index of highest bit difference 1614 SHRL CX, SI // move a's bit to bottom 1615 ANDL $1, SI // mask bit 1616 LEAL -1(SI*2), BX // 1/0 => +1/-1 1617 MOVL BX, (AX) 1618 RET 1619 1620 // all the bytes in common are the same, so we just need 1621 // to compare the lengths. 1622 allsame: 1623 XORL BX, BX 1624 XORL CX, CX 1625 TESTL DX, DX 1626 SETLT BX // 1 if alen > blen 1627 SETEQ CX // 1 if alen == blen 1628 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1629 MOVL BX, (AX) 1630 RET 1631 1632 TEXT runtimefastrand1(SB), NOSPLIT, $0-4 1633 get_tls(CX) 1634 MOVL g(CX), AX 1635 MOVL g_m(AX), AX 1636 MOVL m_fastrand(AX), DX 1637 ADDL DX, DX 1638 MOVL DX, BX 1639 XORL $0x88888eef, DX 1640 CMOVLMI BX, DX 1641 MOVL DX, m_fastrand(AX) 1642 MOVL DX, ret+0(FP) 1643 RET 1644 1645 TEXT runtimereturn0(SB), NOSPLIT, $0 1646 MOVL $0, AX 1647 RET 1648 1649 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1650 // Must obey the gcc calling convention. 1651 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1652 get_tls(CX) 1653 MOVL g(CX), AX 1654 MOVL g_m(AX), AX 1655 MOVL m_curg(AX), AX 1656 MOVL (g_stack+stack_hi)(AX), AX 1657 RET 1658 1659 // The top-most function running on a goroutine 1660 // returns to goexit+PCQuantum. 1661 TEXT runtimegoexit(SB),NOSPLIT,$0-0 1662 BYTE $0x90 // NOP 1663 CALL runtimegoexit1(SB) // does not return 1664 // traceback from goexit1 must hit code range of goexit 1665 BYTE $0x90 // NOP 1666 1667 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-4 1668 MOVL addr+0(FP), AX 1669 PREFETCHT0 (AX) 1670 RET 1671 1672 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-4 1673 MOVL addr+0(FP), AX 1674 PREFETCHT1 (AX) 1675 RET 1676 1677 1678 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-4 1679 MOVL addr+0(FP), AX 1680 PREFETCHT2 (AX) 1681 RET 1682 1683 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-4 1684 MOVL addr+0(FP), AX 1685 PREFETCHNTA (AX) 1686 RET 1687