1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtimert0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, CX // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtimeg0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVQ $0, AX 32 CPUID 33 CMPQ AX, $0 34 JE nocpuinfo 35 MOVQ $1, AX 36 CPUID 37 MOVL CX, runtimecpuid_ecx(SB) 38 MOVL DX, runtimecpuid_edx(SB) 39 nocpuinfo: 40 41 needtls: 42 LEAL runtimem0+m_tls(SB), DI 43 CALL runtimesettls(SB) 44 45 // store through it, to make sure it works 46 get_tls(BX) 47 MOVQ $0x123, g(BX) 48 MOVQ runtimem0+m_tls(SB), AX 49 CMPQ AX, $0x123 50 JEQ 2(PC) 51 MOVL AX, 0 // abort 52 ok: 53 // set the per-goroutine and per-mach "registers" 54 get_tls(BX) 55 LEAL runtimeg0(SB), CX 56 MOVL CX, g(BX) 57 LEAL runtimem0(SB), AX 58 59 // save m->g0 = g0 60 MOVL CX, m_g0(AX) 61 // save m0 to g0->m 62 MOVL AX, g_m(CX) 63 64 CLD // convention is D is always left cleared 65 CALL runtimecheck(SB) 66 67 MOVL 16(SP), AX // copy argc 68 MOVL AX, 0(SP) 69 MOVL 24(SP), AX // copy argv 70 MOVL AX, 4(SP) 71 CALL runtimeargs(SB) 72 CALL runtimeosinit(SB) 73 CALL runtimeschedinit(SB) 74 75 // create a new goroutine to start program 76 MOVL $runtimemainPC(SB), AX // entry 77 MOVL $0, 0(SP) 78 MOVL AX, 4(SP) 79 CALL runtimenewproc(SB) 80 81 // start this M 82 CALL runtimemstart(SB) 83 84 MOVL $0xf1, 0xf1 // crash 85 RET 86 87 DATA runtimemainPC+0(SB)/4,$runtimemain(SB) 88 GLOBL runtimemainPC(SB),RODATA,$4 89 90 TEXT runtimebreakpoint(SB),NOSPLIT,$0-0 91 INT $3 92 RET 93 94 TEXT runtimeasminit(SB),NOSPLIT,$0-0 95 // No per-thread init. 96 RET 97 98 /* 99 * go-routine 100 */ 101 102 // void gosave(Gobuf*) 103 // save state in Gobuf; setjmp 104 TEXT runtimegosave(SB), NOSPLIT, $0-4 105 MOVL buf+0(FP), AX // gobuf 106 LEAL buf+0(FP), BX // caller's SP 107 MOVL BX, gobuf_sp(AX) 108 MOVL 0(SP), BX // caller's PC 109 MOVL BX, gobuf_pc(AX) 110 MOVQ $0, gobuf_ret(AX) 111 // Assert ctxt is zero. See func save. 112 MOVL gobuf_ctxt(AX), BX 113 TESTL BX, BX 114 JZ 2(PC) 115 CALL runtimebadctxt(SB) 116 get_tls(CX) 117 MOVL g(CX), BX 118 MOVL BX, gobuf_g(AX) 119 RET 120 121 // void gogo(Gobuf*) 122 // restore state from Gobuf; longjmp 123 TEXT runtimegogo(SB), NOSPLIT, $8-4 124 MOVL buf+0(FP), BX // gobuf 125 126 // If ctxt is not nil, invoke deletion barrier before overwriting. 127 MOVL gobuf_ctxt(BX), DX 128 TESTL DX, DX 129 JZ nilctxt 130 LEAL gobuf_ctxt(BX), AX 131 MOVL AX, 0(SP) 132 MOVL $0, 4(SP) 133 CALL runtimewritebarrierptr_prewrite(SB) 134 MOVL buf+0(FP), BX 135 136 nilctxt: 137 MOVL gobuf_g(BX), DX 138 MOVL 0(DX), CX // make sure g != nil 139 get_tls(CX) 140 MOVL DX, g(CX) 141 MOVL gobuf_sp(BX), SP // restore SP 142 MOVL gobuf_ctxt(BX), DX 143 MOVQ gobuf_ret(BX), AX 144 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 145 MOVQ $0, gobuf_ret(BX) 146 MOVL $0, gobuf_ctxt(BX) 147 MOVL gobuf_pc(BX), BX 148 JMP BX 149 150 // func mcall(fn func(*g)) 151 // Switch to m->g0's stack, call fn(g). 152 // Fn must never return. It should gogo(&g->sched) 153 // to keep running g. 154 TEXT runtimemcall(SB), NOSPLIT, $0-4 155 MOVL fn+0(FP), DI 156 157 get_tls(CX) 158 MOVL g(CX), AX // save state in g->sched 159 MOVL 0(SP), BX // caller's PC 160 MOVL BX, (g_sched+gobuf_pc)(AX) 161 LEAL fn+0(FP), BX // caller's SP 162 MOVL BX, (g_sched+gobuf_sp)(AX) 163 MOVL AX, (g_sched+gobuf_g)(AX) 164 165 // switch to m->g0 & its stack, call fn 166 MOVL g(CX), BX 167 MOVL g_m(BX), BX 168 MOVL m_g0(BX), SI 169 CMPL SI, AX // if g == m->g0 call badmcall 170 JNE 3(PC) 171 MOVL $runtimebadmcall(SB), AX 172 JMP AX 173 MOVL SI, g(CX) // g = m->g0 174 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 175 PUSHQ AX 176 MOVL DI, DX 177 MOVL 0(DI), DI 178 CALL DI 179 POPQ AX 180 MOVL $runtimebadmcall2(SB), AX 181 JMP AX 182 RET 183 184 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 185 // of the G stack. We need to distinguish the routine that 186 // lives at the bottom of the G stack from the one that lives 187 // at the top of the system stack because the one at the top of 188 // the system stack terminates the stack walk (see topofstack()). 189 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0 190 RET 191 192 // func systemstack(fn func()) 193 TEXT runtimesystemstack(SB), NOSPLIT, $0-4 194 MOVL fn+0(FP), DI // DI = fn 195 get_tls(CX) 196 MOVL g(CX), AX // AX = g 197 MOVL g_m(AX), BX // BX = m 198 199 MOVL m_gsignal(BX), DX // DX = gsignal 200 CMPL AX, DX 201 JEQ noswitch 202 203 MOVL m_g0(BX), DX // DX = g0 204 CMPL AX, DX 205 JEQ noswitch 206 207 MOVL m_curg(BX), R8 208 CMPL AX, R8 209 JEQ switch 210 211 // Not g0, not curg. Must be gsignal, but that's not allowed. 212 // Hide call from linker nosplit analysis. 213 MOVL $runtimebadsystemstack(SB), AX 214 CALL AX 215 216 switch: 217 // save our state in g->sched. Pretend to 218 // be systemstack_switch if the G stack is scanned. 219 MOVL $runtimesystemstack_switch(SB), SI 220 MOVL SI, (g_sched+gobuf_pc)(AX) 221 MOVL SP, (g_sched+gobuf_sp)(AX) 222 MOVL AX, (g_sched+gobuf_g)(AX) 223 224 // switch to g0 225 MOVL DX, g(CX) 226 MOVL (g_sched+gobuf_sp)(DX), SP 227 228 // call target function 229 MOVL DI, DX 230 MOVL 0(DI), DI 231 CALL DI 232 233 // switch back to g 234 get_tls(CX) 235 MOVL g(CX), AX 236 MOVL g_m(AX), BX 237 MOVL m_curg(BX), AX 238 MOVL AX, g(CX) 239 MOVL (g_sched+gobuf_sp)(AX), SP 240 MOVL $0, (g_sched+gobuf_sp)(AX) 241 RET 242 243 noswitch: 244 // already on m stack, just call directly 245 MOVL DI, DX 246 MOVL 0(DI), DI 247 CALL DI 248 RET 249 250 /* 251 * support for morestack 252 */ 253 254 // Called during function prolog when more stack is needed. 255 // 256 // The traceback routines see morestack on a g0 as being 257 // the top of a stack (for example, morestack calling newstack 258 // calling the scheduler calling newm calling gc), so we must 259 // record an argument size. For that purpose, it has no arguments. 260 TEXT runtimemorestack(SB),NOSPLIT,$0-0 261 get_tls(CX) 262 MOVL g(CX), BX 263 MOVL g_m(BX), BX 264 265 // Cannot grow scheduler stack (m->g0). 266 MOVL m_g0(BX), SI 267 CMPL g(CX), SI 268 JNE 3(PC) 269 CALL runtimebadmorestackg0(SB) 270 MOVL 0, AX 271 272 // Cannot grow signal stack (m->gsignal). 273 MOVL m_gsignal(BX), SI 274 CMPL g(CX), SI 275 JNE 3(PC) 276 CALL runtimebadmorestackgsignal(SB) 277 MOVL 0, AX 278 279 // Called from f. 280 // Set m->morebuf to f's caller. 281 MOVL 8(SP), AX // f's caller's PC 282 MOVL AX, (m_morebuf+gobuf_pc)(BX) 283 LEAL 16(SP), AX // f's caller's SP 284 MOVL AX, (m_morebuf+gobuf_sp)(BX) 285 get_tls(CX) 286 MOVL g(CX), SI 287 MOVL SI, (m_morebuf+gobuf_g)(BX) 288 289 // Set g->sched to context in f. 290 MOVL 0(SP), AX // f's PC 291 MOVL AX, (g_sched+gobuf_pc)(SI) 292 MOVL SI, (g_sched+gobuf_g)(SI) 293 LEAL 8(SP), AX // f's SP 294 MOVL AX, (g_sched+gobuf_sp)(SI) 295 // newstack will fill gobuf.ctxt. 296 297 // Call newstack on m->g0's stack. 298 MOVL m_g0(BX), BX 299 MOVL BX, g(CX) 300 MOVL (g_sched+gobuf_sp)(BX), SP 301 PUSHQ DX // ctxt argument 302 CALL runtimenewstack(SB) 303 MOVL $0, 0x1003 // crash if newstack returns 304 POPQ DX // keep balance check happy 305 RET 306 307 // morestack trampolines 308 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$0 309 MOVL $0, DX 310 JMP runtimemorestack(SB) 311 312 TEXT runtimestackBarrier(SB),NOSPLIT,$0 313 // We came here via a RET to an overwritten return PC. 314 // AX may be live. Other registers are available. 315 316 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 317 get_tls(CX) 318 MOVL g(CX), CX 319 MOVL (g_stkbar+slice_array)(CX), DX 320 MOVL g_stkbarPos(CX), BX 321 IMULL $stkbar__size, BX // Too big for SIB. 322 ADDL DX, BX 323 MOVL stkbar_savedLRVal(BX), BX 324 // Record that this stack barrier was hit. 325 ADDL $1, g_stkbarPos(CX) 326 // Jump to the original return PC. 327 JMP BX 328 329 // reflectcall: call a function with the given argument list 330 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 331 // we don't have variable-sized frames, so we use a small number 332 // of constant-sized-frame functions to encode a few bits of size in the pc. 333 // Caution: ugly multiline assembly macros in your future! 334 335 #define DISPATCH(NAME,MAXSIZE) \ 336 CMPL CX, $MAXSIZE; \ 337 JA 3(PC); \ 338 MOVL $NAME(SB), AX; \ 339 JMP AX 340 // Note: can't just "JMP NAME(SB)" - bad inlining results. 341 342 TEXT reflectcall(SB), NOSPLIT, $0-0 343 JMP reflectcall(SB) 344 345 TEXT reflectcall(SB), NOSPLIT, $0-20 346 MOVLQZX argsize+12(FP), CX 347 DISPATCH(runtimecall16, 16) 348 DISPATCH(runtimecall32, 32) 349 DISPATCH(runtimecall64, 64) 350 DISPATCH(runtimecall128, 128) 351 DISPATCH(runtimecall256, 256) 352 DISPATCH(runtimecall512, 512) 353 DISPATCH(runtimecall1024, 1024) 354 DISPATCH(runtimecall2048, 2048) 355 DISPATCH(runtimecall4096, 4096) 356 DISPATCH(runtimecall8192, 8192) 357 DISPATCH(runtimecall16384, 16384) 358 DISPATCH(runtimecall32768, 32768) 359 DISPATCH(runtimecall65536, 65536) 360 DISPATCH(runtimecall131072, 131072) 361 DISPATCH(runtimecall262144, 262144) 362 DISPATCH(runtimecall524288, 524288) 363 DISPATCH(runtimecall1048576, 1048576) 364 DISPATCH(runtimecall2097152, 2097152) 365 DISPATCH(runtimecall4194304, 4194304) 366 DISPATCH(runtimecall8388608, 8388608) 367 DISPATCH(runtimecall16777216, 16777216) 368 DISPATCH(runtimecall33554432, 33554432) 369 DISPATCH(runtimecall67108864, 67108864) 370 DISPATCH(runtimecall134217728, 134217728) 371 DISPATCH(runtimecall268435456, 268435456) 372 DISPATCH(runtimecall536870912, 536870912) 373 DISPATCH(runtimecall1073741824, 1073741824) 374 MOVL $runtimebadreflectcall(SB), AX 375 JMP AX 376 377 #define CALLFN(NAME,MAXSIZE) \ 378 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 379 NO_LOCAL_POINTERS; \ 380 /* copy arguments to stack */ \ 381 MOVL argptr+8(FP), SI; \ 382 MOVL argsize+12(FP), CX; \ 383 MOVL SP, DI; \ 384 REP;MOVSB; \ 385 /* call function */ \ 386 MOVL f+4(FP), DX; \ 387 MOVL (DX), AX; \ 388 CALL AX; \ 389 /* copy return values back */ \ 390 MOVL argtype+0(FP), DX; \ 391 MOVL argptr+8(FP), DI; \ 392 MOVL argsize+12(FP), CX; \ 393 MOVL retoffset+16(FP), BX; \ 394 MOVL SP, SI; \ 395 ADDL BX, DI; \ 396 ADDL BX, SI; \ 397 SUBL BX, CX; \ 398 CALL callRet<>(SB); \ 399 RET 400 401 // callRet copies return values back at the end of call*. This is a 402 // separate function so it can allocate stack space for the arguments 403 // to reflectcallmove. It does not follow the Go ABI; it expects its 404 // arguments in registers. 405 TEXT callRet<>(SB), NOSPLIT, $16-0 406 MOVL DX, 0(SP) 407 MOVL DI, 4(SP) 408 MOVL SI, 8(SP) 409 MOVL CX, 12(SP) 410 CALL runtimereflectcallmove(SB) 411 RET 412 413 CALLFN(call16, 16) 414 CALLFN(call32, 32) 415 CALLFN(call64, 64) 416 CALLFN(call128, 128) 417 CALLFN(call256, 256) 418 CALLFN(call512, 512) 419 CALLFN(call1024, 1024) 420 CALLFN(call2048, 2048) 421 CALLFN(call4096, 4096) 422 CALLFN(call8192, 8192) 423 CALLFN(call16384, 16384) 424 CALLFN(call32768, 32768) 425 CALLFN(call65536, 65536) 426 CALLFN(call131072, 131072) 427 CALLFN(call262144, 262144) 428 CALLFN(call524288, 524288) 429 CALLFN(call1048576, 1048576) 430 CALLFN(call2097152, 2097152) 431 CALLFN(call4194304, 4194304) 432 CALLFN(call8388608, 8388608) 433 CALLFN(call16777216, 16777216) 434 CALLFN(call33554432, 33554432) 435 CALLFN(call67108864, 67108864) 436 CALLFN(call134217728, 134217728) 437 CALLFN(call268435456, 268435456) 438 CALLFN(call536870912, 536870912) 439 CALLFN(call1073741824, 1073741824) 440 441 TEXT runtimeprocyield(SB),NOSPLIT,$0-0 442 MOVL cycles+0(FP), AX 443 again: 444 PAUSE 445 SUBL $1, AX 446 JNZ again 447 RET 448 449 TEXT publicationBarrier(SB),NOSPLIT,$0-0 450 // Stores are already ordered on x86, so this is just a 451 // compile barrier. 452 RET 453 454 // void jmpdefer(fn, sp); 455 // called from deferreturn. 456 // 1. pop the caller 457 // 2. sub 5 bytes from the callers return 458 // 3. jmp to the argument 459 TEXT runtimejmpdefer(SB), NOSPLIT, $0-8 460 MOVL fv+0(FP), DX 461 MOVL argp+4(FP), BX 462 LEAL -8(BX), SP // caller sp after CALL 463 SUBL $5, (SP) // return to CALL again 464 MOVL 0(DX), BX 465 JMP BX // but first run the deferred function 466 467 // func asmcgocall(fn, arg unsafe.Pointer) int32 468 // Not implemented. 469 TEXT runtimeasmcgocall(SB),NOSPLIT,$0-12 470 MOVL 0, AX 471 RET 472 473 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 474 // Not implemented. 475 TEXT runtimecgocallback(SB),NOSPLIT,$0-16 476 MOVL 0, AX 477 RET 478 479 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 480 // Not implemented. 481 TEXT cgocallback_gofunc(SB),NOSPLIT,$0-16 482 MOVL 0, AX 483 RET 484 485 // void setg(G*); set g. for use by needm. 486 // Not implemented. 487 TEXT runtimesetg(SB), NOSPLIT, $0-4 488 MOVL 0, AX 489 RET 490 491 // check that SP is in range [g->stack.lo, g->stack.hi) 492 TEXT runtimestackcheck(SB), NOSPLIT, $0-0 493 get_tls(CX) 494 MOVL g(CX), AX 495 CMPL (g_stack+stack_hi)(AX), SP 496 JHI 2(PC) 497 MOVL 0, AX 498 CMPL SP, (g_stack+stack_lo)(AX) 499 JHI 2(PC) 500 MOVL 0, AX 501 RET 502 503 TEXT runtimememclrNoHeapPointers(SB),NOSPLIT,$0-8 504 MOVL ptr+0(FP), DI 505 MOVL n+4(FP), CX 506 MOVQ CX, BX 507 ANDQ $3, BX 508 SHRQ $2, CX 509 MOVQ $0, AX 510 CLD 511 REP 512 STOSL 513 MOVQ BX, CX 514 REP 515 STOSB 516 // Note: we zero only 4 bytes at a time so that the tail is at most 517 // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB. 518 // See issue 13160. 519 RET 520 521 TEXT runtimegetcallerpc(SB),NOSPLIT,$8-12 522 MOVL argp+0(FP),AX // addr of first arg 523 MOVL -8(AX),AX // get calling pc 524 CMPL AX, runtimestackBarrierPC(SB) 525 JNE nobar 526 // Get original return PC. 527 CALL runtimenextBarrierPC(SB) 528 MOVL 0(SP), AX 529 nobar: 530 MOVL AX, ret+8(FP) 531 RET 532 533 TEXT runtimesetcallerpc(SB),NOSPLIT,$8-8 534 MOVL argp+0(FP),AX // addr of first arg 535 MOVL pc+4(FP), BX // pc to set 536 MOVL -8(AX), CX 537 CMPL CX, runtimestackBarrierPC(SB) 538 JEQ setbar 539 MOVQ BX, -8(AX) // set calling pc 540 RET 541 setbar: 542 // Set the stack barrier return PC. 543 MOVL BX, 0(SP) 544 CALL runtimesetNextBarrierPC(SB) 545 RET 546 547 // int64 runtimecputicks(void) 548 TEXT runtimecputicks(SB),NOSPLIT,$0-0 549 RDTSC 550 SHLQ $32, DX 551 ADDQ DX, AX 552 MOVQ AX, ret+0(FP) 553 RET 554 555 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 556 // redirects to memhash(p, h, size) using the size 557 // stored in the closure. 558 TEXT runtimememhash_varlen(SB),NOSPLIT,$24-12 559 GO_ARGS 560 NO_LOCAL_POINTERS 561 MOVL p+0(FP), AX 562 MOVL h+4(FP), BX 563 MOVL 4(DX), CX 564 MOVL AX, 0(SP) 565 MOVL BX, 4(SP) 566 MOVL CX, 8(SP) 567 CALL runtimememhash(SB) 568 MOVL 16(SP), AX 569 MOVL AX, ret+8(FP) 570 RET 571 572 // hash function using AES hardware instructions 573 // For now, our one amd64p32 system (NaCl) does not 574 // support using AES instructions, so have not bothered to 575 // write the implementations. Can copy and adjust the ones 576 // in asm_amd64.s when the time comes. 577 578 TEXT runtimeaeshash(SB),NOSPLIT,$0-20 579 MOVL AX, ret+16(FP) 580 RET 581 582 TEXT runtimeaeshashstr(SB),NOSPLIT,$0-12 583 MOVL AX, ret+8(FP) 584 RET 585 586 TEXT runtimeaeshash32(SB),NOSPLIT,$0-12 587 MOVL AX, ret+8(FP) 588 RET 589 590 TEXT runtimeaeshash64(SB),NOSPLIT,$0-12 591 MOVL AX, ret+8(FP) 592 RET 593 594 // memequal(p, q unsafe.Pointer, size uintptr) bool 595 TEXT runtimememequal(SB),NOSPLIT,$0-17 596 MOVL a+0(FP), SI 597 MOVL b+4(FP), DI 598 CMPL SI, DI 599 JEQ eq 600 MOVL size+8(FP), BX 601 CALL runtimememeqbody(SB) 602 MOVB AX, ret+16(FP) 603 RET 604 eq: 605 MOVB $1, ret+16(FP) 606 RET 607 608 // memequal_varlen(a, b unsafe.Pointer) bool 609 TEXT runtimememequal_varlen(SB),NOSPLIT,$0-9 610 MOVL a+0(FP), SI 611 MOVL b+4(FP), DI 612 CMPL SI, DI 613 JEQ eq 614 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 615 CALL runtimememeqbody(SB) 616 MOVB AX, ret+8(FP) 617 RET 618 eq: 619 MOVB $1, ret+8(FP) 620 RET 621 622 // eqstring tests whether two strings are equal. 623 // The compiler guarantees that strings passed 624 // to eqstring have equal length. 625 // See runtime_test.go:eqstring_generic for 626 // equivalent Go code. 627 TEXT runtimeeqstring(SB),NOSPLIT,$0-17 628 MOVL s1_base+0(FP), SI 629 MOVL s2_base+8(FP), DI 630 CMPL SI, DI 631 JEQ same 632 MOVL s1_len+4(FP), BX 633 CALL runtimememeqbody(SB) 634 MOVB AX, ret+16(FP) 635 RET 636 same: 637 MOVB $1, ret+16(FP) 638 RET 639 640 // a in SI 641 // b in DI 642 // count in BX 643 TEXT runtimememeqbody(SB),NOSPLIT,$0-0 644 XORQ AX, AX 645 646 CMPQ BX, $8 647 JB small 648 649 // 64 bytes at a time using xmm registers 650 hugeloop: 651 CMPQ BX, $64 652 JB bigloop 653 MOVOU (SI), X0 654 MOVOU (DI), X1 655 MOVOU 16(SI), X2 656 MOVOU 16(DI), X3 657 MOVOU 32(SI), X4 658 MOVOU 32(DI), X5 659 MOVOU 48(SI), X6 660 MOVOU 48(DI), X7 661 PCMPEQB X1, X0 662 PCMPEQB X3, X2 663 PCMPEQB X5, X4 664 PCMPEQB X7, X6 665 PAND X2, X0 666 PAND X6, X4 667 PAND X4, X0 668 PMOVMSKB X0, DX 669 ADDQ $64, SI 670 ADDQ $64, DI 671 SUBQ $64, BX 672 CMPL DX, $0xffff 673 JEQ hugeloop 674 RET 675 676 // 8 bytes at a time using 64-bit register 677 bigloop: 678 CMPQ BX, $8 679 JBE leftover 680 MOVQ (SI), CX 681 MOVQ (DI), DX 682 ADDQ $8, SI 683 ADDQ $8, DI 684 SUBQ $8, BX 685 CMPQ CX, DX 686 JEQ bigloop 687 RET 688 689 // remaining 0-8 bytes 690 leftover: 691 ADDQ BX, SI 692 ADDQ BX, DI 693 MOVQ -8(SI), CX 694 MOVQ -8(DI), DX 695 CMPQ CX, DX 696 SETEQ AX 697 RET 698 699 small: 700 CMPQ BX, $0 701 JEQ equal 702 703 LEAQ 0(BX*8), CX 704 NEGQ CX 705 706 CMPB SI, $0xf8 707 JA si_high 708 709 // load at SI won't cross a page boundary. 710 MOVQ (SI), SI 711 JMP si_finish 712 si_high: 713 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 714 MOVQ BX, DX 715 ADDQ SI, DX 716 MOVQ -8(DX), SI 717 SHRQ CX, SI 718 si_finish: 719 720 // same for DI. 721 CMPB DI, $0xf8 722 JA di_high 723 MOVQ (DI), DI 724 JMP di_finish 725 di_high: 726 MOVQ BX, DX 727 ADDQ DI, DX 728 MOVQ -8(DX), DI 729 SHRQ CX, DI 730 di_finish: 731 732 SUBQ SI, DI 733 SHLQ CX, DI 734 equal: 735 SETEQ AX 736 RET 737 738 TEXT runtimecmpstring(SB),NOSPLIT,$0-20 739 MOVL s1_base+0(FP), SI 740 MOVL s1_len+4(FP), BX 741 MOVL s2_base+8(FP), DI 742 MOVL s2_len+12(FP), DX 743 CALL runtimecmpbody(SB) 744 MOVL AX, ret+16(FP) 745 RET 746 747 TEXT bytesCompare(SB),NOSPLIT,$0-28 748 MOVL s1+0(FP), SI 749 MOVL s1+4(FP), BX 750 MOVL s2+12(FP), DI 751 MOVL s2+16(FP), DX 752 CALL runtimecmpbody(SB) 753 MOVL AX, res+24(FP) 754 RET 755 756 // input: 757 // SI = a 758 // DI = b 759 // BX = alen 760 // DX = blen 761 // output: 762 // AX = 1/0/-1 763 TEXT runtimecmpbody(SB),NOSPLIT,$0-0 764 CMPQ SI, DI 765 JEQ allsame 766 CMPQ BX, DX 767 MOVQ DX, R8 768 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 769 CMPQ R8, $8 770 JB small 771 772 loop: 773 CMPQ R8, $16 774 JBE _0through16 775 MOVOU (SI), X0 776 MOVOU (DI), X1 777 PCMPEQB X0, X1 778 PMOVMSKB X1, AX 779 XORQ $0xffff, AX // convert EQ to NE 780 JNE diff16 // branch if at least one byte is not equal 781 ADDQ $16, SI 782 ADDQ $16, DI 783 SUBQ $16, R8 784 JMP loop 785 786 // AX = bit mask of differences 787 diff16: 788 BSFQ AX, BX // index of first byte that differs 789 XORQ AX, AX 790 ADDQ BX, SI 791 MOVB (SI), CX 792 ADDQ BX, DI 793 CMPB CX, (DI) 794 SETHI AX 795 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 796 RET 797 798 // 0 through 16 bytes left, alen>=8, blen>=8 799 _0through16: 800 CMPQ R8, $8 801 JBE _0through8 802 MOVQ (SI), AX 803 MOVQ (DI), CX 804 CMPQ AX, CX 805 JNE diff8 806 _0through8: 807 ADDQ R8, SI 808 ADDQ R8, DI 809 MOVQ -8(SI), AX 810 MOVQ -8(DI), CX 811 CMPQ AX, CX 812 JEQ allsame 813 814 // AX and CX contain parts of a and b that differ. 815 diff8: 816 BSWAPQ AX // reverse order of bytes 817 BSWAPQ CX 818 XORQ AX, CX 819 BSRQ CX, CX // index of highest bit difference 820 SHRQ CX, AX // move a's bit to bottom 821 ANDQ $1, AX // mask bit 822 LEAQ -1(AX*2), AX // 1/0 => +1/-1 823 RET 824 825 // 0-7 bytes in common 826 small: 827 LEAQ (R8*8), CX // bytes left -> bits left 828 NEGQ CX // - bits lift (== 64 - bits left mod 64) 829 JEQ allsame 830 831 // load bytes of a into high bytes of AX 832 CMPB SI, $0xf8 833 JA si_high 834 MOVQ (SI), SI 835 JMP si_finish 836 si_high: 837 ADDQ R8, SI 838 MOVQ -8(SI), SI 839 SHRQ CX, SI 840 si_finish: 841 SHLQ CX, SI 842 843 // load bytes of b in to high bytes of BX 844 CMPB DI, $0xf8 845 JA di_high 846 MOVQ (DI), DI 847 JMP di_finish 848 di_high: 849 ADDQ R8, DI 850 MOVQ -8(DI), DI 851 SHRQ CX, DI 852 di_finish: 853 SHLQ CX, DI 854 855 BSWAPQ SI // reverse order of bytes 856 BSWAPQ DI 857 XORQ SI, DI // find bit differences 858 JEQ allsame 859 BSRQ DI, CX // index of highest bit difference 860 SHRQ CX, SI // move a's bit to bottom 861 ANDQ $1, SI // mask bit 862 LEAQ -1(SI*2), AX // 1/0 => +1/-1 863 RET 864 865 allsame: 866 XORQ AX, AX 867 XORQ CX, CX 868 CMPQ BX, DX 869 SETGT AX // 1 if alen > blen 870 SETEQ CX // 1 if alen == blen 871 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 872 RET 873 874 TEXT bytesIndexByte(SB),NOSPLIT,$0-20 875 MOVL s+0(FP), SI 876 MOVL s_len+4(FP), BX 877 MOVB c+12(FP), AL 878 CALL runtimeindexbytebody(SB) 879 MOVL AX, ret+16(FP) 880 RET 881 882 TEXT stringsIndexByte(SB),NOSPLIT,$0-20 883 MOVL s+0(FP), SI 884 MOVL s_len+4(FP), BX 885 MOVB c+8(FP), AL 886 CALL runtimeindexbytebody(SB) 887 MOVL AX, ret+16(FP) 888 RET 889 890 // input: 891 // SI: data 892 // BX: data len 893 // AL: byte sought 894 // output: 895 // AX 896 TEXT runtimeindexbytebody(SB),NOSPLIT,$0 897 MOVL SI, DI 898 899 CMPL BX, $16 900 JLT small 901 902 // round up to first 16-byte boundary 903 TESTL $15, SI 904 JZ aligned 905 MOVL SI, CX 906 ANDL $~15, CX 907 ADDL $16, CX 908 909 // search the beginning 910 SUBL SI, CX 911 REPN; SCASB 912 JZ success 913 914 // DI is 16-byte aligned; get ready to search using SSE instructions 915 aligned: 916 // round down to last 16-byte boundary 917 MOVL BX, R11 918 ADDL SI, R11 919 ANDL $~15, R11 920 921 // shuffle X0 around so that each byte contains c 922 MOVD AX, X0 923 PUNPCKLBW X0, X0 924 PUNPCKLBW X0, X0 925 PSHUFL $0, X0, X0 926 JMP condition 927 928 sse: 929 // move the next 16-byte chunk of the buffer into X1 930 MOVO (DI), X1 931 // compare bytes in X0 to X1 932 PCMPEQB X0, X1 933 // take the top bit of each byte in X1 and put the result in DX 934 PMOVMSKB X1, DX 935 TESTL DX, DX 936 JNZ ssesuccess 937 ADDL $16, DI 938 939 condition: 940 CMPL DI, R11 941 JLT sse 942 943 // search the end 944 MOVL SI, CX 945 ADDL BX, CX 946 SUBL R11, CX 947 // if CX == 0, the zero flag will be set and we'll end up 948 // returning a false success 949 JZ failure 950 REPN; SCASB 951 JZ success 952 953 failure: 954 MOVL $-1, AX 955 RET 956 957 // handle for lengths < 16 958 small: 959 MOVL BX, CX 960 REPN; SCASB 961 JZ success 962 MOVL $-1, AX 963 RET 964 965 // we've found the chunk containing the byte 966 // now just figure out which specific byte it is 967 ssesuccess: 968 // get the index of the least significant set bit 969 BSFW DX, DX 970 SUBL SI, DI 971 ADDL DI, DX 972 MOVL DX, AX 973 RET 974 975 success: 976 SUBL SI, DI 977 SUBL $1, DI 978 MOVL DI, AX 979 RET 980 981 TEXT bytesEqual(SB),NOSPLIT,$0-25 982 MOVL a_len+4(FP), BX 983 MOVL b_len+16(FP), CX 984 XORL AX, AX 985 CMPL BX, CX 986 JNE eqret 987 MOVL a+0(FP), SI 988 MOVL b+12(FP), DI 989 CALL runtimememeqbody(SB) 990 eqret: 991 MOVB AX, ret+24(FP) 992 RET 993 994 TEXT runtimefastrand(SB), NOSPLIT, $0-4 995 get_tls(CX) 996 MOVL g(CX), AX 997 MOVL g_m(AX), AX 998 MOVL m_fastrand(AX), DX 999 ADDL DX, DX 1000 MOVL DX, BX 1001 XORL $0x88888eef, DX 1002 CMOVLMI BX, DX 1003 MOVL DX, m_fastrand(AX) 1004 MOVL DX, ret+0(FP) 1005 RET 1006 1007 TEXT runtimereturn0(SB), NOSPLIT, $0 1008 MOVL $0, AX 1009 RET 1010 1011 // The top-most function running on a goroutine 1012 // returns to goexit+PCQuantum. 1013 TEXT runtimegoexit(SB),NOSPLIT,$0-0 1014 BYTE $0x90 // NOP 1015 CALL runtimegoexit1(SB) // does not return 1016 // traceback from goexit1 must hit code range of goexit 1017 BYTE $0x90 // NOP 1018 1019 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-4 1020 MOVL addr+0(FP), AX 1021 PREFETCHT0 (AX) 1022 RET 1023 1024 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-4 1025 MOVL addr+0(FP), AX 1026 PREFETCHT1 (AX) 1027 RET 1028 1029 1030 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-4 1031 MOVL addr+0(FP), AX 1032 PREFETCHT2 (AX) 1033 RET 1034 1035 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-4 1036 MOVL addr+0(FP), AX 1037 PREFETCHNTA (AX) 1038 RET 1039 1040 TEXT checkASM(SB),NOSPLIT,$0-1 1041 MOVB $1, ret+0(FP) 1042 RET 1043