1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 // _rt0_386 is common startup code for most 386 systems when using 11 // internal linking. This is the entry point for the program from the 12 // kernel for an ordinary -buildmode=exe program. The stack holds the 13 // number of arguments and the C-style argv. 14 TEXT _rt0_386(SB),NOSPLIT,$8 15 MOVL 8(SP), AX // argc 16 LEAL 12(SP), BX // argv 17 MOVL AX, 0(SP) 18 MOVL BX, 4(SP) 19 JMP runtimert0_go(SB) 20 21 // _rt0_386_lib is common startup code for most 386 systems when 22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will 23 // arrange to invoke this function as a global constructor (for 24 // c-archive) or when the shared library is loaded (for c-shared). 25 // We expect argc and argv to be passed on the stack following the 26 // usual C ABI. 27 TEXT _rt0_386_lib(SB),NOSPLIT,$0 28 PUSHL BP 29 MOVL SP, BP 30 PUSHL BX 31 PUSHL SI 32 PUSHL DI 33 34 MOVL 8(BP), AX 35 MOVL AX, _rt0_386_lib_argc<>(SB) 36 MOVL 12(BP), AX 37 MOVL AX, _rt0_386_lib_argv<>(SB) 38 39 // Synchronous initialization. 40 CALL runtimelibpreinit(SB) 41 42 SUBL $8, SP 43 44 // Create a new thread to do the runtime initialization. 45 MOVL _cgo_sys_thread_create(SB), AX 46 TESTL AX, AX 47 JZ nocgo 48 49 // Align stack to call C function. 50 // We moved SP to BP above, but BP was clobbered by the libpreinit call. 51 MOVL SP, BP 52 ANDL $~15, SP 53 54 MOVL $_rt0_386_lib_go(SB), BX 55 MOVL BX, 0(SP) 56 MOVL $0, 4(SP) 57 58 CALL AX 59 60 MOVL BP, SP 61 62 JMP restore 63 64 nocgo: 65 MOVL $0x800000, 0(SP) // stacksize = 8192KB 66 MOVL $_rt0_386_lib_go(SB), AX 67 MOVL AX, 4(SP) // fn 68 CALL runtimenewosproc0(SB) 69 70 restore: 71 ADDL $8, SP 72 POPL DI 73 POPL SI 74 POPL BX 75 POPL BP 76 RET 77 78 // _rt0_386_lib_go initializes the Go runtime. 79 // This is started in a separate thread by _rt0_386_lib. 80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8 81 MOVL _rt0_386_lib_argc<>(SB), AX 82 MOVL AX, 0(SP) 83 MOVL _rt0_386_lib_argv<>(SB), AX 84 MOVL AX, 4(SP) 85 JMP runtimert0_go(SB) 86 87 DATA _rt0_386_lib_argc<>(SB)/4, $0 88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4 89 DATA _rt0_386_lib_argv<>(SB)/4, $0 90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4 91 92 TEXT runtimert0_go(SB),NOSPLIT,$0 93 // Copy arguments forward on an even stack. 94 // Users of this function jump to it, they don't call it. 95 MOVL 0(SP), AX 96 MOVL 4(SP), BX 97 SUBL $128, SP // plenty of scratch 98 ANDL $~15, SP 99 MOVL AX, 120(SP) // save argc, argv away 100 MOVL BX, 124(SP) 101 102 // set default stack bounds. 103 // _cgo_init may update stackguard. 104 MOVL $runtimeg0(SB), BP 105 LEAL (-64*1024+104)(SP), BX 106 MOVL BX, g_stackguard0(BP) 107 MOVL BX, g_stackguard1(BP) 108 MOVL BX, (g_stack+stack_lo)(BP) 109 MOVL SP, (g_stack+stack_hi)(BP) 110 111 // find out information about the processor we're on 112 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 113 JMP has_cpuid 114 #else 115 // first see if CPUID instruction is supported. 116 PUSHFL 117 PUSHFL 118 XORL $(1<<21), 0(SP) // flip ID bit 119 POPFL 120 PUSHFL 121 POPL AX 122 XORL 0(SP), AX 123 POPFL // restore EFLAGS 124 TESTL $(1<<21), AX 125 JNE has_cpuid 126 #endif 127 128 bad_proc: // show that the program requires MMX. 129 MOVL $2, 0(SP) 130 MOVL $bad_proc_msg<>(SB), 4(SP) 131 MOVL $0x3d, 8(SP) 132 CALL runtimewrite(SB) 133 MOVL $1, 0(SP) 134 CALL runtimeexit(SB) 135 INT $3 136 137 has_cpuid: 138 MOVL $0, AX 139 CPUID 140 MOVL AX, SI 141 CMPL AX, $0 142 JE nocpuinfo 143 144 // Figure out how to serialize RDTSC. 145 // On Intel processors LFENCE is enough. AMD requires MFENCE. 146 // Don't know about the rest, so let's do MFENCE. 147 CMPL BX, $0x756E6547 // "Genu" 148 JNE notintel 149 CMPL DX, $0x49656E69 // "ineI" 150 JNE notintel 151 CMPL CX, $0x6C65746E // "ntel" 152 JNE notintel 153 MOVB $1, runtimeisIntel(SB) 154 MOVB $1, runtimelfenceBeforeRdtsc(SB) 155 notintel: 156 157 // Load EAX=1 cpuid flags 158 MOVL $1, AX 159 CPUID 160 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 161 MOVL AX, runtimeprocessorVersionInfo(SB) 162 163 // Check for MMX support 164 TESTL $(1<<23), DX // MMX 165 JZ bad_proc 166 167 TESTL $(1<<26), DX // SSE2 168 SETNE runtimesupport_sse2(SB) 169 170 TESTL $(1<<9), DI // SSSE3 171 SETNE runtimesupport_ssse3(SB) 172 173 TESTL $(1<<19), DI // SSE4.1 174 SETNE runtimesupport_sse41(SB) 175 176 TESTL $(1<<20), DI // SSE4.2 177 SETNE runtimesupport_sse42(SB) 178 179 TESTL $(1<<23), DI // POPCNT 180 SETNE runtimesupport_popcnt(SB) 181 182 TESTL $(1<<25), DI // AES 183 SETNE runtimesupport_aes(SB) 184 185 TESTL $(1<<27), DI // OSXSAVE 186 SETNE runtimesupport_osxsave(SB) 187 188 // If OS support for XMM and YMM is not present 189 // support_avx will be set back to false later. 190 TESTL $(1<<28), DI // AVX 191 SETNE runtimesupport_avx(SB) 192 193 eax7: 194 // Load EAX=7/ECX=0 cpuid flags 195 CMPL SI, $7 196 JLT osavx 197 MOVL $7, AX 198 MOVL $0, CX 199 CPUID 200 201 TESTL $(1<<3), BX // BMI1 202 SETNE runtimesupport_bmi1(SB) 203 204 // If OS support for XMM and YMM is not present 205 // support_avx2 will be set back to false later. 206 TESTL $(1<<5), BX 207 SETNE runtimesupport_avx2(SB) 208 209 TESTL $(1<<8), BX // BMI2 210 SETNE runtimesupport_bmi2(SB) 211 212 TESTL $(1<<9), BX // ERMS 213 SETNE runtimesupport_erms(SB) 214 215 osavx: 216 // nacl does not support XGETBV to test 217 // for XMM and YMM OS support. 218 #ifndef GOOS_nacl 219 CMPB runtimesupport_osxsave(SB), $1 220 JNE noavx 221 MOVL $0, CX 222 // For XGETBV, OSXSAVE bit is required and sufficient 223 XGETBV 224 ANDL $6, AX 225 CMPL AX, $6 // Check for OS support of XMM and YMM registers. 226 JE nocpuinfo 227 #endif 228 noavx: 229 MOVB $0, runtimesupport_avx(SB) 230 MOVB $0, runtimesupport_avx2(SB) 231 232 nocpuinfo: 233 // if there is an _cgo_init, call it to let it 234 // initialize and to set up GS. if not, 235 // we set up GS ourselves. 236 MOVL _cgo_init(SB), AX 237 TESTL AX, AX 238 JZ needtls 239 MOVL $setg_gcc<>(SB), BX 240 MOVL BX, 4(SP) 241 MOVL BP, 0(SP) 242 CALL AX 243 244 // update stackguard after _cgo_init 245 MOVL $runtimeg0(SB), CX 246 MOVL (g_stack+stack_lo)(CX), AX 247 ADDL $const__StackGuard, AX 248 MOVL AX, g_stackguard0(CX) 249 MOVL AX, g_stackguard1(CX) 250 251 #ifndef GOOS_windows 252 // skip runtimeldt0setup(SB) and tls test after _cgo_init for non-windows 253 JMP ok 254 #endif 255 needtls: 256 #ifdef GOOS_plan9 257 // skip runtimeldt0setup(SB) and tls test on Plan 9 in all cases 258 JMP ok 259 #endif 260 261 // set up %gs 262 CALL runtimeldt0setup(SB) 263 264 // store through it, to make sure it works 265 get_tls(BX) 266 MOVL $0x123, g(BX) 267 MOVL runtimem0+m_tls(SB), AX 268 CMPL AX, $0x123 269 JEQ ok 270 MOVL AX, 0 // abort 271 ok: 272 // set up m and g "registers" 273 get_tls(BX) 274 LEAL runtimeg0(SB), DX 275 MOVL DX, g(BX) 276 LEAL runtimem0(SB), AX 277 278 // save m->g0 = g0 279 MOVL DX, m_g0(AX) 280 // save g0->m = m0 281 MOVL AX, g_m(DX) 282 283 CALL runtimeemptyfunc(SB) // fault if stack check is wrong 284 285 // convention is D is always cleared 286 CLD 287 288 CALL runtimecheck(SB) 289 290 // saved argc, argv 291 MOVL 120(SP), AX 292 MOVL AX, 0(SP) 293 MOVL 124(SP), AX 294 MOVL AX, 4(SP) 295 CALL runtimeargs(SB) 296 CALL runtimeosinit(SB) 297 CALL runtimeschedinit(SB) 298 299 // create a new goroutine to start program 300 PUSHL $runtimemainPC(SB) // entry 301 PUSHL $0 // arg size 302 CALL runtimenewproc(SB) 303 POPL AX 304 POPL AX 305 306 // start this M 307 CALL runtimemstart(SB) 308 309 INT $3 310 RET 311 312 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 313 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 314 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 315 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 316 DATA bad_proc_msg<>+0x20(SB)/8, $"processo" 317 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 318 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 319 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 320 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 321 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 322 323 DATA runtimemainPC+0(SB)/4,$runtimemain(SB) 324 GLOBL runtimemainPC(SB),RODATA,$4 325 326 TEXT runtimebreakpoint(SB),NOSPLIT,$0-0 327 INT $3 328 RET 329 330 TEXT runtimeasminit(SB),NOSPLIT,$0-0 331 // Linux and MinGW start the FPU in extended double precision. 332 // Other operating systems use double precision. 333 // Change to double precision to match them, 334 // and to match other hardware that only has double. 335 FLDCW runtimecontrolWord64(SB) 336 RET 337 338 /* 339 * go-routine 340 */ 341 342 // void gosave(Gobuf*) 343 // save state in Gobuf; setjmp 344 TEXT runtimegosave(SB), NOSPLIT, $0-4 345 MOVL buf+0(FP), AX // gobuf 346 LEAL buf+0(FP), BX // caller's SP 347 MOVL BX, gobuf_sp(AX) 348 MOVL 0(SP), BX // caller's PC 349 MOVL BX, gobuf_pc(AX) 350 MOVL $0, gobuf_ret(AX) 351 // Assert ctxt is zero. See func save. 352 MOVL gobuf_ctxt(AX), BX 353 TESTL BX, BX 354 JZ 2(PC) 355 CALL runtimebadctxt(SB) 356 get_tls(CX) 357 MOVL g(CX), BX 358 MOVL BX, gobuf_g(AX) 359 RET 360 361 // void gogo(Gobuf*) 362 // restore state from Gobuf; longjmp 363 TEXT runtimegogo(SB), NOSPLIT, $8-4 364 MOVL buf+0(FP), BX // gobuf 365 MOVL gobuf_g(BX), DX 366 MOVL 0(DX), CX // make sure g != nil 367 get_tls(CX) 368 MOVL DX, g(CX) 369 MOVL gobuf_sp(BX), SP // restore SP 370 MOVL gobuf_ret(BX), AX 371 MOVL gobuf_ctxt(BX), DX 372 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 373 MOVL $0, gobuf_ret(BX) 374 MOVL $0, gobuf_ctxt(BX) 375 MOVL gobuf_pc(BX), BX 376 JMP BX 377 378 // func mcall(fn func(*g)) 379 // Switch to m->g0's stack, call fn(g). 380 // Fn must never return. It should gogo(&g->sched) 381 // to keep running g. 382 TEXT runtimemcall(SB), NOSPLIT, $0-4 383 MOVL fn+0(FP), DI 384 385 get_tls(DX) 386 MOVL g(DX), AX // save state in g->sched 387 MOVL 0(SP), BX // caller's PC 388 MOVL BX, (g_sched+gobuf_pc)(AX) 389 LEAL fn+0(FP), BX // caller's SP 390 MOVL BX, (g_sched+gobuf_sp)(AX) 391 MOVL AX, (g_sched+gobuf_g)(AX) 392 393 // switch to m->g0 & its stack, call fn 394 MOVL g(DX), BX 395 MOVL g_m(BX), BX 396 MOVL m_g0(BX), SI 397 CMPL SI, AX // if g == m->g0 call badmcall 398 JNE 3(PC) 399 MOVL $runtimebadmcall(SB), AX 400 JMP AX 401 MOVL SI, g(DX) // g = m->g0 402 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 403 PUSHL AX 404 MOVL DI, DX 405 MOVL 0(DI), DI 406 CALL DI 407 POPL AX 408 MOVL $runtimebadmcall2(SB), AX 409 JMP AX 410 RET 411 412 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 413 // of the G stack. We need to distinguish the routine that 414 // lives at the bottom of the G stack from the one that lives 415 // at the top of the system stack because the one at the top of 416 // the system stack terminates the stack walk (see topofstack()). 417 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0 418 RET 419 420 // func systemstack(fn func()) 421 TEXT runtimesystemstack(SB), NOSPLIT, $0-4 422 MOVL fn+0(FP), DI // DI = fn 423 get_tls(CX) 424 MOVL g(CX), AX // AX = g 425 MOVL g_m(AX), BX // BX = m 426 427 MOVL m_gsignal(BX), DX // DX = gsignal 428 CMPL AX, DX 429 JEQ noswitch 430 431 MOVL m_g0(BX), DX // DX = g0 432 CMPL AX, DX 433 JEQ noswitch 434 435 MOVL m_curg(BX), BP 436 CMPL AX, BP 437 JEQ switch 438 439 // Bad: g is not gsignal, not g0, not curg. What is it? 440 // Hide call from linker nosplit analysis. 441 MOVL $runtimebadsystemstack(SB), AX 442 CALL AX 443 444 switch: 445 // save our state in g->sched. Pretend to 446 // be systemstack_switch if the G stack is scanned. 447 MOVL $runtimesystemstack_switch(SB), (g_sched+gobuf_pc)(AX) 448 MOVL SP, (g_sched+gobuf_sp)(AX) 449 MOVL AX, (g_sched+gobuf_g)(AX) 450 451 // switch to g0 452 get_tls(CX) 453 MOVL DX, g(CX) 454 MOVL (g_sched+gobuf_sp)(DX), BX 455 // make it look like mstart called systemstack on g0, to stop traceback 456 SUBL $4, BX 457 MOVL $runtimemstart(SB), DX 458 MOVL DX, 0(BX) 459 MOVL BX, SP 460 461 // call target function 462 MOVL DI, DX 463 MOVL 0(DI), DI 464 CALL DI 465 466 // switch back to g 467 get_tls(CX) 468 MOVL g(CX), AX 469 MOVL g_m(AX), BX 470 MOVL m_curg(BX), AX 471 MOVL AX, g(CX) 472 MOVL (g_sched+gobuf_sp)(AX), SP 473 MOVL $0, (g_sched+gobuf_sp)(AX) 474 RET 475 476 noswitch: 477 // already on system stack; tail call the function 478 // Using a tail call here cleans up tracebacks since we won't stop 479 // at an intermediate systemstack. 480 MOVL DI, DX 481 MOVL 0(DI), DI 482 JMP DI 483 484 /* 485 * support for morestack 486 */ 487 488 // Called during function prolog when more stack is needed. 489 // 490 // The traceback routines see morestack on a g0 as being 491 // the top of a stack (for example, morestack calling newstack 492 // calling the scheduler calling newm calling gc), so we must 493 // record an argument size. For that purpose, it has no arguments. 494 TEXT runtimemorestack(SB),NOSPLIT,$0-0 495 // Cannot grow scheduler stack (m->g0). 496 get_tls(CX) 497 MOVL g(CX), BX 498 MOVL g_m(BX), BX 499 MOVL m_g0(BX), SI 500 CMPL g(CX), SI 501 JNE 3(PC) 502 CALL runtimebadmorestackg0(SB) 503 INT $3 504 505 // Cannot grow signal stack. 506 MOVL m_gsignal(BX), SI 507 CMPL g(CX), SI 508 JNE 3(PC) 509 CALL runtimebadmorestackgsignal(SB) 510 INT $3 511 512 // Called from f. 513 // Set m->morebuf to f's caller. 514 MOVL 4(SP), DI // f's caller's PC 515 MOVL DI, (m_morebuf+gobuf_pc)(BX) 516 LEAL 8(SP), CX // f's caller's SP 517 MOVL CX, (m_morebuf+gobuf_sp)(BX) 518 get_tls(CX) 519 MOVL g(CX), SI 520 MOVL SI, (m_morebuf+gobuf_g)(BX) 521 522 // Set g->sched to context in f. 523 MOVL 0(SP), AX // f's PC 524 MOVL AX, (g_sched+gobuf_pc)(SI) 525 MOVL SI, (g_sched+gobuf_g)(SI) 526 LEAL 4(SP), AX // f's SP 527 MOVL AX, (g_sched+gobuf_sp)(SI) 528 MOVL DX, (g_sched+gobuf_ctxt)(SI) 529 530 // Call newstack on m->g0's stack. 531 MOVL m_g0(BX), BP 532 MOVL BP, g(CX) 533 MOVL (g_sched+gobuf_sp)(BP), AX 534 MOVL -4(AX), BX // fault if CALL would, before smashing SP 535 MOVL AX, SP 536 CALL runtimenewstack(SB) 537 MOVL $0, 0x1003 // crash if newstack returns 538 RET 539 540 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$0-0 541 MOVL $0, DX 542 JMP runtimemorestack(SB) 543 544 // reflectcall: call a function with the given argument list 545 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 546 // we don't have variable-sized frames, so we use a small number 547 // of constant-sized-frame functions to encode a few bits of size in the pc. 548 // Caution: ugly multiline assembly macros in your future! 549 550 #define DISPATCH(NAME,MAXSIZE) \ 551 CMPL CX, $MAXSIZE; \ 552 JA 3(PC); \ 553 MOVL $NAME(SB), AX; \ 554 JMP AX 555 // Note: can't just "JMP NAME(SB)" - bad inlining results. 556 557 TEXT reflectcall(SB), NOSPLIT, $0-0 558 JMP reflectcall(SB) 559 560 TEXT reflectcall(SB), NOSPLIT, $0-20 561 MOVL argsize+12(FP), CX 562 DISPATCH(runtimecall16, 16) 563 DISPATCH(runtimecall32, 32) 564 DISPATCH(runtimecall64, 64) 565 DISPATCH(runtimecall128, 128) 566 DISPATCH(runtimecall256, 256) 567 DISPATCH(runtimecall512, 512) 568 DISPATCH(runtimecall1024, 1024) 569 DISPATCH(runtimecall2048, 2048) 570 DISPATCH(runtimecall4096, 4096) 571 DISPATCH(runtimecall8192, 8192) 572 DISPATCH(runtimecall16384, 16384) 573 DISPATCH(runtimecall32768, 32768) 574 DISPATCH(runtimecall65536, 65536) 575 DISPATCH(runtimecall131072, 131072) 576 DISPATCH(runtimecall262144, 262144) 577 DISPATCH(runtimecall524288, 524288) 578 DISPATCH(runtimecall1048576, 1048576) 579 DISPATCH(runtimecall2097152, 2097152) 580 DISPATCH(runtimecall4194304, 4194304) 581 DISPATCH(runtimecall8388608, 8388608) 582 DISPATCH(runtimecall16777216, 16777216) 583 DISPATCH(runtimecall33554432, 33554432) 584 DISPATCH(runtimecall67108864, 67108864) 585 DISPATCH(runtimecall134217728, 134217728) 586 DISPATCH(runtimecall268435456, 268435456) 587 DISPATCH(runtimecall536870912, 536870912) 588 DISPATCH(runtimecall1073741824, 1073741824) 589 MOVL $runtimebadreflectcall(SB), AX 590 JMP AX 591 592 #define CALLFN(NAME,MAXSIZE) \ 593 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 594 NO_LOCAL_POINTERS; \ 595 /* copy arguments to stack */ \ 596 MOVL argptr+8(FP), SI; \ 597 MOVL argsize+12(FP), CX; \ 598 MOVL SP, DI; \ 599 REP;MOVSB; \ 600 /* call function */ \ 601 MOVL f+4(FP), DX; \ 602 MOVL (DX), AX; \ 603 PCDATA $PCDATA_StackMapIndex, $0; \ 604 CALL AX; \ 605 /* copy return values back */ \ 606 MOVL argtype+0(FP), DX; \ 607 MOVL argptr+8(FP), DI; \ 608 MOVL argsize+12(FP), CX; \ 609 MOVL retoffset+16(FP), BX; \ 610 MOVL SP, SI; \ 611 ADDL BX, DI; \ 612 ADDL BX, SI; \ 613 SUBL BX, CX; \ 614 CALL callRet<>(SB); \ 615 RET 616 617 // callRet copies return values back at the end of call*. This is a 618 // separate function so it can allocate stack space for the arguments 619 // to reflectcallmove. It does not follow the Go ABI; it expects its 620 // arguments in registers. 621 TEXT callRet<>(SB), NOSPLIT, $16-0 622 MOVL DX, 0(SP) 623 MOVL DI, 4(SP) 624 MOVL SI, 8(SP) 625 MOVL CX, 12(SP) 626 CALL runtimereflectcallmove(SB) 627 RET 628 629 CALLFN(call16, 16) 630 CALLFN(call32, 32) 631 CALLFN(call64, 64) 632 CALLFN(call128, 128) 633 CALLFN(call256, 256) 634 CALLFN(call512, 512) 635 CALLFN(call1024, 1024) 636 CALLFN(call2048, 2048) 637 CALLFN(call4096, 4096) 638 CALLFN(call8192, 8192) 639 CALLFN(call16384, 16384) 640 CALLFN(call32768, 32768) 641 CALLFN(call65536, 65536) 642 CALLFN(call131072, 131072) 643 CALLFN(call262144, 262144) 644 CALLFN(call524288, 524288) 645 CALLFN(call1048576, 1048576) 646 CALLFN(call2097152, 2097152) 647 CALLFN(call4194304, 4194304) 648 CALLFN(call8388608, 8388608) 649 CALLFN(call16777216, 16777216) 650 CALLFN(call33554432, 33554432) 651 CALLFN(call67108864, 67108864) 652 CALLFN(call134217728, 134217728) 653 CALLFN(call268435456, 268435456) 654 CALLFN(call536870912, 536870912) 655 CALLFN(call1073741824, 1073741824) 656 657 TEXT runtimeprocyield(SB),NOSPLIT,$0-0 658 MOVL cycles+0(FP), AX 659 again: 660 PAUSE 661 SUBL $1, AX 662 JNZ again 663 RET 664 665 TEXT publicationBarrier(SB),NOSPLIT,$0-0 666 // Stores are already ordered on x86, so this is just a 667 // compile barrier. 668 RET 669 670 // void jmpdefer(fn, sp); 671 // called from deferreturn. 672 // 1. pop the caller 673 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 674 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 675 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 676 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 677 // 3. jmp to the argument 678 TEXT runtimejmpdefer(SB), NOSPLIT, $0-8 679 MOVL fv+0(FP), DX // fn 680 MOVL argp+4(FP), BX // caller sp 681 LEAL -4(BX), SP // caller sp after CALL 682 #ifdef GOBUILDMODE_shared 683 SUBL $16, (SP) // return to CALL again 684 #else 685 SUBL $5, (SP) // return to CALL again 686 #endif 687 MOVL 0(DX), BX 688 JMP BX // but first run the deferred function 689 690 // Save state of caller into g->sched. 691 TEXT gosave<>(SB),NOSPLIT,$0 692 PUSHL AX 693 PUSHL BX 694 get_tls(BX) 695 MOVL g(BX), BX 696 LEAL arg+0(FP), AX 697 MOVL AX, (g_sched+gobuf_sp)(BX) 698 MOVL -4(AX), AX 699 MOVL AX, (g_sched+gobuf_pc)(BX) 700 MOVL $0, (g_sched+gobuf_ret)(BX) 701 // Assert ctxt is zero. See func save. 702 MOVL (g_sched+gobuf_ctxt)(BX), AX 703 TESTL AX, AX 704 JZ 2(PC) 705 CALL runtimebadctxt(SB) 706 POPL BX 707 POPL AX 708 RET 709 710 // func asmcgocall(fn, arg unsafe.Pointer) int32 711 // Call fn(arg) on the scheduler stack, 712 // aligned appropriately for the gcc ABI. 713 // See cgocall.go for more details. 714 TEXT asmcgocall(SB),NOSPLIT,$0-12 715 MOVL fn+0(FP), AX 716 MOVL arg+4(FP), BX 717 718 MOVL SP, DX 719 720 // Figure out if we need to switch to m->g0 stack. 721 // We get called to create new OS threads too, and those 722 // come in on the m->g0 stack already. 723 get_tls(CX) 724 MOVL g(CX), BP 725 MOVL g_m(BP), BP 726 MOVL m_g0(BP), SI 727 MOVL g(CX), DI 728 CMPL SI, DI 729 JEQ noswitch 730 CALL gosave<>(SB) 731 get_tls(CX) 732 MOVL SI, g(CX) 733 MOVL (g_sched+gobuf_sp)(SI), SP 734 735 noswitch: 736 // Now on a scheduling stack (a pthread-created stack). 737 SUBL $32, SP 738 ANDL $~15, SP // alignment, perhaps unnecessary 739 MOVL DI, 8(SP) // save g 740 MOVL (g_stack+stack_hi)(DI), DI 741 SUBL DX, DI 742 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 743 MOVL BX, 0(SP) // first argument in x86-32 ABI 744 CALL AX 745 746 // Restore registers, g, stack pointer. 747 get_tls(CX) 748 MOVL 8(SP), DI 749 MOVL (g_stack+stack_hi)(DI), SI 750 SUBL 4(SP), SI 751 MOVL DI, g(CX) 752 MOVL SI, SP 753 754 MOVL AX, ret+8(FP) 755 RET 756 757 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 758 // Turn the fn into a Go func (by taking its address) and call 759 // cgocallback_gofunc. 760 TEXT runtimecgocallback(SB),NOSPLIT,$16-16 761 LEAL fn+0(FP), AX 762 MOVL AX, 0(SP) 763 MOVL frame+4(FP), AX 764 MOVL AX, 4(SP) 765 MOVL framesize+8(FP), AX 766 MOVL AX, 8(SP) 767 MOVL ctxt+12(FP), AX 768 MOVL AX, 12(SP) 769 MOVL $runtimecgocallback_gofunc(SB), AX 770 CALL AX 771 RET 772 773 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 774 // See cgocall.go for more details. 775 TEXT cgocallback_gofunc(SB),NOSPLIT,$12-16 776 NO_LOCAL_POINTERS 777 778 // If g is nil, Go did not create the current thread. 779 // Call needm to obtain one for temporary use. 780 // In this case, we're running on the thread stack, so there's 781 // lots of space, but the linker doesn't know. Hide the call from 782 // the linker analysis by using an indirect call through AX. 783 get_tls(CX) 784 #ifdef GOOS_windows 785 MOVL $0, BP 786 CMPL CX, $0 787 JEQ 2(PC) // TODO 788 #endif 789 MOVL g(CX), BP 790 CMPL BP, $0 791 JEQ needm 792 MOVL g_m(BP), BP 793 MOVL BP, DX // saved copy of oldm 794 JMP havem 795 needm: 796 MOVL $0, 0(SP) 797 MOVL $runtimeneedm(SB), AX 798 CALL AX 799 MOVL 0(SP), DX 800 get_tls(CX) 801 MOVL g(CX), BP 802 MOVL g_m(BP), BP 803 804 // Set m->sched.sp = SP, so that if a panic happens 805 // during the function we are about to execute, it will 806 // have a valid SP to run on the g0 stack. 807 // The next few lines (after the havem label) 808 // will save this SP onto the stack and then write 809 // the same SP back to m->sched.sp. That seems redundant, 810 // but if an unrecovered panic happens, unwindm will 811 // restore the g->sched.sp from the stack location 812 // and then systemstack will try to use it. If we don't set it here, 813 // that restored SP will be uninitialized (typically 0) and 814 // will not be usable. 815 MOVL m_g0(BP), SI 816 MOVL SP, (g_sched+gobuf_sp)(SI) 817 818 havem: 819 // Now there's a valid m, and we're running on its m->g0. 820 // Save current m->g0->sched.sp on stack and then set it to SP. 821 // Save current sp in m->g0->sched.sp in preparation for 822 // switch back to m->curg stack. 823 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 824 MOVL m_g0(BP), SI 825 MOVL (g_sched+gobuf_sp)(SI), AX 826 MOVL AX, 0(SP) 827 MOVL SP, (g_sched+gobuf_sp)(SI) 828 829 // Switch to m->curg stack and call runtime.cgocallbackg. 830 // Because we are taking over the execution of m->curg 831 // but *not* resuming what had been running, we need to 832 // save that information (m->curg->sched) so we can restore it. 833 // We can restore m->curg->sched.sp easily, because calling 834 // runtime.cgocallbackg leaves SP unchanged upon return. 835 // To save m->curg->sched.pc, we push it onto the stack. 836 // This has the added benefit that it looks to the traceback 837 // routine like cgocallbackg is going to return to that 838 // PC (because the frame we allocate below has the same 839 // size as cgocallback_gofunc's frame declared above) 840 // so that the traceback will seamlessly trace back into 841 // the earlier calls. 842 // 843 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 844 // 8(SP) is unused. 845 MOVL m_curg(BP), SI 846 MOVL SI, g(CX) 847 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 848 MOVL (g_sched+gobuf_pc)(SI), BP 849 MOVL BP, -4(DI) 850 MOVL ctxt+12(FP), CX 851 LEAL -(4+12)(DI), SP 852 MOVL DX, 4(SP) 853 MOVL CX, 0(SP) 854 CALL runtimecgocallbackg(SB) 855 MOVL 4(SP), DX 856 857 // Restore g->sched (== m->curg->sched) from saved values. 858 get_tls(CX) 859 MOVL g(CX), SI 860 MOVL 12(SP), BP 861 MOVL BP, (g_sched+gobuf_pc)(SI) 862 LEAL (12+4)(SP), DI 863 MOVL DI, (g_sched+gobuf_sp)(SI) 864 865 // Switch back to m->g0's stack and restore m->g0->sched.sp. 866 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 867 // so we do not have to restore it.) 868 MOVL g(CX), BP 869 MOVL g_m(BP), BP 870 MOVL m_g0(BP), SI 871 MOVL SI, g(CX) 872 MOVL (g_sched+gobuf_sp)(SI), SP 873 MOVL 0(SP), AX 874 MOVL AX, (g_sched+gobuf_sp)(SI) 875 876 // If the m on entry was nil, we called needm above to borrow an m 877 // for the duration of the call. Since the call is over, return it with dropm. 878 CMPL DX, $0 879 JNE 3(PC) 880 MOVL $runtimedropm(SB), AX 881 CALL AX 882 883 // Done! 884 RET 885 886 // void setg(G*); set g. for use by needm. 887 TEXT runtimesetg(SB), NOSPLIT, $0-4 888 MOVL gg+0(FP), BX 889 #ifdef GOOS_windows 890 CMPL BX, $0 891 JNE settls 892 MOVL $0, 0x14(FS) 893 RET 894 settls: 895 MOVL g_m(BX), AX 896 LEAL m_tls(AX), AX 897 MOVL AX, 0x14(FS) 898 #endif 899 get_tls(CX) 900 MOVL BX, g(CX) 901 RET 902 903 // void setg_gcc(G*); set g. for use by gcc 904 TEXT setg_gcc<>(SB), NOSPLIT, $0 905 get_tls(AX) 906 MOVL gg+0(FP), DX 907 MOVL DX, g(AX) 908 RET 909 910 // check that SP is in range [g->stack.lo, g->stack.hi) 911 TEXT runtimestackcheck(SB), NOSPLIT, $0-0 912 get_tls(CX) 913 MOVL g(CX), AX 914 CMPL (g_stack+stack_hi)(AX), SP 915 JHI 2(PC) 916 INT $3 917 CMPL SP, (g_stack+stack_lo)(AX) 918 JHI 2(PC) 919 INT $3 920 RET 921 922 // func cputicks() int64 923 TEXT runtimecputicks(SB),NOSPLIT,$0-8 924 CMPB runtimesupport_sse2(SB), $1 925 JNE done 926 CMPB runtimelfenceBeforeRdtsc(SB), $1 927 JNE mfence 928 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 929 JMP done 930 mfence: 931 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 932 done: 933 RDTSC 934 MOVL AX, ret_lo+0(FP) 935 MOVL DX, ret_hi+4(FP) 936 RET 937 938 TEXT runtimeldt0setup(SB),NOSPLIT,$16-0 939 // set up ldt 7 to point at m0.tls 940 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 941 // the entry number is just a hint. setldt will set up GS with what it used. 942 MOVL $7, 0(SP) 943 LEAL runtimem0+m_tls(SB), AX 944 MOVL AX, 4(SP) 945 MOVL $32, 8(SP) // sizeof(tls array) 946 CALL runtimesetldt(SB) 947 RET 948 949 TEXT runtimeemptyfunc(SB),0,$0-0 950 RET 951 952 // hash function using AES hardware instructions 953 TEXT runtimeaeshash(SB),NOSPLIT,$0-16 954 MOVL p+0(FP), AX // ptr to data 955 MOVL s+8(FP), BX // size 956 LEAL ret+12(FP), DX 957 JMP runtimeaeshashbody(SB) 958 959 TEXT runtimeaeshashstr(SB),NOSPLIT,$0-12 960 MOVL p+0(FP), AX // ptr to string object 961 MOVL 4(AX), BX // length of string 962 MOVL (AX), AX // string data 963 LEAL ret+8(FP), DX 964 JMP runtimeaeshashbody(SB) 965 966 // AX: data 967 // BX: length 968 // DX: address to put return value 969 TEXT runtimeaeshashbody(SB),NOSPLIT,$0-0 970 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 971 PINSRW $4, BX, X0 // 16 bits of length 972 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 973 MOVO X0, X1 // save unscrambled seed 974 PXOR runtimeaeskeysched(SB), X0 // xor in per-process seed 975 AESENC X0, X0 // scramble seed 976 977 CMPL BX, $16 978 JB aes0to15 979 JE aes16 980 CMPL BX, $32 981 JBE aes17to32 982 CMPL BX, $64 983 JBE aes33to64 984 JMP aes65plus 985 986 aes0to15: 987 TESTL BX, BX 988 JE aes0 989 990 ADDL $16, AX 991 TESTW $0xff0, AX 992 JE endofpage 993 994 // 16 bytes loaded at this address won't cross 995 // a page boundary, so we can load it directly. 996 MOVOU -16(AX), X1 997 ADDL BX, BX 998 PAND masks<>(SB)(BX*8), X1 999 1000 final1: 1001 AESENC X0, X1 // scramble input, xor in seed 1002 AESENC X1, X1 // scramble combo 2 times 1003 AESENC X1, X1 1004 MOVL X1, (DX) 1005 RET 1006 1007 endofpage: 1008 // address ends in 1111xxxx. Might be up against 1009 // a page boundary, so load ending at last byte. 1010 // Then shift bytes down using pshufb. 1011 MOVOU -32(AX)(BX*1), X1 1012 ADDL BX, BX 1013 PSHUFB shifts<>(SB)(BX*8), X1 1014 JMP final1 1015 1016 aes0: 1017 // Return scrambled input seed 1018 AESENC X0, X0 1019 MOVL X0, (DX) 1020 RET 1021 1022 aes16: 1023 MOVOU (AX), X1 1024 JMP final1 1025 1026 aes17to32: 1027 // make second starting seed 1028 PXOR runtimeaeskeysched+16(SB), X1 1029 AESENC X1, X1 1030 1031 // load data to be hashed 1032 MOVOU (AX), X2 1033 MOVOU -16(AX)(BX*1), X3 1034 1035 // scramble 3 times 1036 AESENC X0, X2 1037 AESENC X1, X3 1038 AESENC X2, X2 1039 AESENC X3, X3 1040 AESENC X2, X2 1041 AESENC X3, X3 1042 1043 // combine results 1044 PXOR X3, X2 1045 MOVL X2, (DX) 1046 RET 1047 1048 aes33to64: 1049 // make 3 more starting seeds 1050 MOVO X1, X2 1051 MOVO X1, X3 1052 PXOR runtimeaeskeysched+16(SB), X1 1053 PXOR runtimeaeskeysched+32(SB), X2 1054 PXOR runtimeaeskeysched+48(SB), X3 1055 AESENC X1, X1 1056 AESENC X2, X2 1057 AESENC X3, X3 1058 1059 MOVOU (AX), X4 1060 MOVOU 16(AX), X5 1061 MOVOU -32(AX)(BX*1), X6 1062 MOVOU -16(AX)(BX*1), X7 1063 1064 AESENC X0, X4 1065 AESENC X1, X5 1066 AESENC X2, X6 1067 AESENC X3, X7 1068 1069 AESENC X4, X4 1070 AESENC X5, X5 1071 AESENC X6, X6 1072 AESENC X7, X7 1073 1074 AESENC X4, X4 1075 AESENC X5, X5 1076 AESENC X6, X6 1077 AESENC X7, X7 1078 1079 PXOR X6, X4 1080 PXOR X7, X5 1081 PXOR X5, X4 1082 MOVL X4, (DX) 1083 RET 1084 1085 aes65plus: 1086 // make 3 more starting seeds 1087 MOVO X1, X2 1088 MOVO X1, X3 1089 PXOR runtimeaeskeysched+16(SB), X1 1090 PXOR runtimeaeskeysched+32(SB), X2 1091 PXOR runtimeaeskeysched+48(SB), X3 1092 AESENC X1, X1 1093 AESENC X2, X2 1094 AESENC X3, X3 1095 1096 // start with last (possibly overlapping) block 1097 MOVOU -64(AX)(BX*1), X4 1098 MOVOU -48(AX)(BX*1), X5 1099 MOVOU -32(AX)(BX*1), X6 1100 MOVOU -16(AX)(BX*1), X7 1101 1102 // scramble state once 1103 AESENC X0, X4 1104 AESENC X1, X5 1105 AESENC X2, X6 1106 AESENC X3, X7 1107 1108 // compute number of remaining 64-byte blocks 1109 DECL BX 1110 SHRL $6, BX 1111 1112 aesloop: 1113 // scramble state, xor in a block 1114 MOVOU (AX), X0 1115 MOVOU 16(AX), X1 1116 MOVOU 32(AX), X2 1117 MOVOU 48(AX), X3 1118 AESENC X0, X4 1119 AESENC X1, X5 1120 AESENC X2, X6 1121 AESENC X3, X7 1122 1123 // scramble state 1124 AESENC X4, X4 1125 AESENC X5, X5 1126 AESENC X6, X6 1127 AESENC X7, X7 1128 1129 ADDL $64, AX 1130 DECL BX 1131 JNE aesloop 1132 1133 // 2 more scrambles to finish 1134 AESENC X4, X4 1135 AESENC X5, X5 1136 AESENC X6, X6 1137 AESENC X7, X7 1138 1139 AESENC X4, X4 1140 AESENC X5, X5 1141 AESENC X6, X6 1142 AESENC X7, X7 1143 1144 PXOR X6, X4 1145 PXOR X7, X5 1146 PXOR X5, X4 1147 MOVL X4, (DX) 1148 RET 1149 1150 TEXT runtimeaeshash32(SB),NOSPLIT,$0-12 1151 MOVL p+0(FP), AX // ptr to data 1152 MOVL h+4(FP), X0 // seed 1153 PINSRD $1, (AX), X0 // data 1154 AESENC runtimeaeskeysched+0(SB), X0 1155 AESENC runtimeaeskeysched+16(SB), X0 1156 AESENC runtimeaeskeysched+32(SB), X0 1157 MOVL X0, ret+8(FP) 1158 RET 1159 1160 TEXT runtimeaeshash64(SB),NOSPLIT,$0-12 1161 MOVL p+0(FP), AX // ptr to data 1162 MOVQ (AX), X0 // data 1163 PINSRD $2, h+4(FP), X0 // seed 1164 AESENC runtimeaeskeysched+0(SB), X0 1165 AESENC runtimeaeskeysched+16(SB), X0 1166 AESENC runtimeaeskeysched+32(SB), X0 1167 MOVL X0, ret+8(FP) 1168 RET 1169 1170 // simple mask to get rid of data in the high part of the register. 1171 DATA masks<>+0x00(SB)/4, $0x00000000 1172 DATA masks<>+0x04(SB)/4, $0x00000000 1173 DATA masks<>+0x08(SB)/4, $0x00000000 1174 DATA masks<>+0x0c(SB)/4, $0x00000000 1175 1176 DATA masks<>+0x10(SB)/4, $0x000000ff 1177 DATA masks<>+0x14(SB)/4, $0x00000000 1178 DATA masks<>+0x18(SB)/4, $0x00000000 1179 DATA masks<>+0x1c(SB)/4, $0x00000000 1180 1181 DATA masks<>+0x20(SB)/4, $0x0000ffff 1182 DATA masks<>+0x24(SB)/4, $0x00000000 1183 DATA masks<>+0x28(SB)/4, $0x00000000 1184 DATA masks<>+0x2c(SB)/4, $0x00000000 1185 1186 DATA masks<>+0x30(SB)/4, $0x00ffffff 1187 DATA masks<>+0x34(SB)/4, $0x00000000 1188 DATA masks<>+0x38(SB)/4, $0x00000000 1189 DATA masks<>+0x3c(SB)/4, $0x00000000 1190 1191 DATA masks<>+0x40(SB)/4, $0xffffffff 1192 DATA masks<>+0x44(SB)/4, $0x00000000 1193 DATA masks<>+0x48(SB)/4, $0x00000000 1194 DATA masks<>+0x4c(SB)/4, $0x00000000 1195 1196 DATA masks<>+0x50(SB)/4, $0xffffffff 1197 DATA masks<>+0x54(SB)/4, $0x000000ff 1198 DATA masks<>+0x58(SB)/4, $0x00000000 1199 DATA masks<>+0x5c(SB)/4, $0x00000000 1200 1201 DATA masks<>+0x60(SB)/4, $0xffffffff 1202 DATA masks<>+0x64(SB)/4, $0x0000ffff 1203 DATA masks<>+0x68(SB)/4, $0x00000000 1204 DATA masks<>+0x6c(SB)/4, $0x00000000 1205 1206 DATA masks<>+0x70(SB)/4, $0xffffffff 1207 DATA masks<>+0x74(SB)/4, $0x00ffffff 1208 DATA masks<>+0x78(SB)/4, $0x00000000 1209 DATA masks<>+0x7c(SB)/4, $0x00000000 1210 1211 DATA masks<>+0x80(SB)/4, $0xffffffff 1212 DATA masks<>+0x84(SB)/4, $0xffffffff 1213 DATA masks<>+0x88(SB)/4, $0x00000000 1214 DATA masks<>+0x8c(SB)/4, $0x00000000 1215 1216 DATA masks<>+0x90(SB)/4, $0xffffffff 1217 DATA masks<>+0x94(SB)/4, $0xffffffff 1218 DATA masks<>+0x98(SB)/4, $0x000000ff 1219 DATA masks<>+0x9c(SB)/4, $0x00000000 1220 1221 DATA masks<>+0xa0(SB)/4, $0xffffffff 1222 DATA masks<>+0xa4(SB)/4, $0xffffffff 1223 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1224 DATA masks<>+0xac(SB)/4, $0x00000000 1225 1226 DATA masks<>+0xb0(SB)/4, $0xffffffff 1227 DATA masks<>+0xb4(SB)/4, $0xffffffff 1228 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1229 DATA masks<>+0xbc(SB)/4, $0x00000000 1230 1231 DATA masks<>+0xc0(SB)/4, $0xffffffff 1232 DATA masks<>+0xc4(SB)/4, $0xffffffff 1233 DATA masks<>+0xc8(SB)/4, $0xffffffff 1234 DATA masks<>+0xcc(SB)/4, $0x00000000 1235 1236 DATA masks<>+0xd0(SB)/4, $0xffffffff 1237 DATA masks<>+0xd4(SB)/4, $0xffffffff 1238 DATA masks<>+0xd8(SB)/4, $0xffffffff 1239 DATA masks<>+0xdc(SB)/4, $0x000000ff 1240 1241 DATA masks<>+0xe0(SB)/4, $0xffffffff 1242 DATA masks<>+0xe4(SB)/4, $0xffffffff 1243 DATA masks<>+0xe8(SB)/4, $0xffffffff 1244 DATA masks<>+0xec(SB)/4, $0x0000ffff 1245 1246 DATA masks<>+0xf0(SB)/4, $0xffffffff 1247 DATA masks<>+0xf4(SB)/4, $0xffffffff 1248 DATA masks<>+0xf8(SB)/4, $0xffffffff 1249 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1250 1251 GLOBL masks<>(SB),RODATA,$256 1252 1253 // these are arguments to pshufb. They move data down from 1254 // the high bytes of the register to the low bytes of the register. 1255 // index is how many bytes to move. 1256 DATA shifts<>+0x00(SB)/4, $0x00000000 1257 DATA shifts<>+0x04(SB)/4, $0x00000000 1258 DATA shifts<>+0x08(SB)/4, $0x00000000 1259 DATA shifts<>+0x0c(SB)/4, $0x00000000 1260 1261 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1262 DATA shifts<>+0x14(SB)/4, $0xffffffff 1263 DATA shifts<>+0x18(SB)/4, $0xffffffff 1264 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1265 1266 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1267 DATA shifts<>+0x24(SB)/4, $0xffffffff 1268 DATA shifts<>+0x28(SB)/4, $0xffffffff 1269 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1270 1271 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1272 DATA shifts<>+0x34(SB)/4, $0xffffffff 1273 DATA shifts<>+0x38(SB)/4, $0xffffffff 1274 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1275 1276 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1277 DATA shifts<>+0x44(SB)/4, $0xffffffff 1278 DATA shifts<>+0x48(SB)/4, $0xffffffff 1279 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1280 1281 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1282 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1283 DATA shifts<>+0x58(SB)/4, $0xffffffff 1284 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1285 1286 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1287 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1288 DATA shifts<>+0x68(SB)/4, $0xffffffff 1289 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1290 1291 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1292 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1293 DATA shifts<>+0x78(SB)/4, $0xffffffff 1294 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1295 1296 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1297 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1298 DATA shifts<>+0x88(SB)/4, $0xffffffff 1299 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1300 1301 DATA shifts<>+0x90(SB)/4, $0x0a090807 1302 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1303 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1304 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1305 1306 DATA shifts<>+0xa0(SB)/4, $0x09080706 1307 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1308 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1309 DATA shifts<>+0xac(SB)/4, $0xffffffff 1310 1311 DATA shifts<>+0xb0(SB)/4, $0x08070605 1312 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1313 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1314 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1315 1316 DATA shifts<>+0xc0(SB)/4, $0x07060504 1317 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1318 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1319 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1320 1321 DATA shifts<>+0xd0(SB)/4, $0x06050403 1322 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1323 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1324 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1325 1326 DATA shifts<>+0xe0(SB)/4, $0x05040302 1327 DATA shifts<>+0xe4(SB)/4, $0x09080706 1328 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1329 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1330 1331 DATA shifts<>+0xf0(SB)/4, $0x04030201 1332 DATA shifts<>+0xf4(SB)/4, $0x08070605 1333 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1334 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1335 1336 GLOBL shifts<>(SB),RODATA,$256 1337 1338 TEXT checkASM(SB),NOSPLIT,$0-1 1339 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1340 MOVL $masks<>(SB), AX 1341 MOVL $shifts<>(SB), BX 1342 ORL BX, AX 1343 TESTL $15, AX 1344 SETEQ ret+0(FP) 1345 RET 1346 1347 // memequal(p, q unsafe.Pointer, size uintptr) bool 1348 TEXT runtimememequal(SB),NOSPLIT,$0-13 1349 MOVL a+0(FP), SI 1350 MOVL b+4(FP), DI 1351 CMPL SI, DI 1352 JEQ eq 1353 MOVL size+8(FP), BX 1354 LEAL ret+12(FP), AX 1355 JMP runtimememeqbody(SB) 1356 eq: 1357 MOVB $1, ret+12(FP) 1358 RET 1359 1360 // memequal_varlen(a, b unsafe.Pointer) bool 1361 TEXT runtimememequal_varlen(SB),NOSPLIT,$0-9 1362 MOVL a+0(FP), SI 1363 MOVL b+4(FP), DI 1364 CMPL SI, DI 1365 JEQ eq 1366 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1367 LEAL ret+8(FP), AX 1368 JMP runtimememeqbody(SB) 1369 eq: 1370 MOVB $1, ret+8(FP) 1371 RET 1372 1373 TEXT bytesEqual(SB),NOSPLIT,$0-25 1374 MOVL a_len+4(FP), BX 1375 MOVL b_len+16(FP), CX 1376 CMPL BX, CX 1377 JNE eqret 1378 MOVL a+0(FP), SI 1379 MOVL b+12(FP), DI 1380 LEAL ret+24(FP), AX 1381 JMP runtimememeqbody(SB) 1382 eqret: 1383 MOVB $0, ret+24(FP) 1384 RET 1385 1386 // a in SI 1387 // b in DI 1388 // count in BX 1389 // address of result byte in AX 1390 TEXT runtimememeqbody(SB),NOSPLIT,$0-0 1391 CMPL BX, $4 1392 JB small 1393 1394 // 64 bytes at a time using xmm registers 1395 hugeloop: 1396 CMPL BX, $64 1397 JB bigloop 1398 CMPB runtimesupport_sse2(SB), $1 1399 JNE bigloop 1400 MOVOU (SI), X0 1401 MOVOU (DI), X1 1402 MOVOU 16(SI), X2 1403 MOVOU 16(DI), X3 1404 MOVOU 32(SI), X4 1405 MOVOU 32(DI), X5 1406 MOVOU 48(SI), X6 1407 MOVOU 48(DI), X7 1408 PCMPEQB X1, X0 1409 PCMPEQB X3, X2 1410 PCMPEQB X5, X4 1411 PCMPEQB X7, X6 1412 PAND X2, X0 1413 PAND X6, X4 1414 PAND X4, X0 1415 PMOVMSKB X0, DX 1416 ADDL $64, SI 1417 ADDL $64, DI 1418 SUBL $64, BX 1419 CMPL DX, $0xffff 1420 JEQ hugeloop 1421 MOVB $0, (AX) 1422 RET 1423 1424 // 4 bytes at a time using 32-bit register 1425 bigloop: 1426 CMPL BX, $4 1427 JBE leftover 1428 MOVL (SI), CX 1429 MOVL (DI), DX 1430 ADDL $4, SI 1431 ADDL $4, DI 1432 SUBL $4, BX 1433 CMPL CX, DX 1434 JEQ bigloop 1435 MOVB $0, (AX) 1436 RET 1437 1438 // remaining 0-4 bytes 1439 leftover: 1440 MOVL -4(SI)(BX*1), CX 1441 MOVL -4(DI)(BX*1), DX 1442 CMPL CX, DX 1443 SETEQ (AX) 1444 RET 1445 1446 small: 1447 CMPL BX, $0 1448 JEQ equal 1449 1450 LEAL 0(BX*8), CX 1451 NEGL CX 1452 1453 MOVL SI, DX 1454 CMPB DX, $0xfc 1455 JA si_high 1456 1457 // load at SI won't cross a page boundary. 1458 MOVL (SI), SI 1459 JMP si_finish 1460 si_high: 1461 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1462 MOVL -4(SI)(BX*1), SI 1463 SHRL CX, SI 1464 si_finish: 1465 1466 // same for DI. 1467 MOVL DI, DX 1468 CMPB DX, $0xfc 1469 JA di_high 1470 MOVL (DI), DI 1471 JMP di_finish 1472 di_high: 1473 MOVL -4(DI)(BX*1), DI 1474 SHRL CX, DI 1475 di_finish: 1476 1477 SUBL SI, DI 1478 SHLL CX, DI 1479 equal: 1480 SETEQ (AX) 1481 RET 1482 1483 TEXT runtimecmpstring(SB),NOSPLIT,$0-20 1484 MOVL s1_base+0(FP), SI 1485 MOVL s1_len+4(FP), BX 1486 MOVL s2_base+8(FP), DI 1487 MOVL s2_len+12(FP), DX 1488 LEAL ret+16(FP), AX 1489 JMP runtimecmpbody(SB) 1490 1491 TEXT bytesCompare(SB),NOSPLIT,$0-28 1492 MOVL s1+0(FP), SI 1493 MOVL s1+4(FP), BX 1494 MOVL s2+12(FP), DI 1495 MOVL s2+16(FP), DX 1496 LEAL ret+24(FP), AX 1497 JMP runtimecmpbody(SB) 1498 1499 TEXT bytesIndexByte(SB),NOSPLIT,$0-20 1500 MOVL s+0(FP), SI 1501 MOVL s_len+4(FP), CX 1502 MOVB c+12(FP), AL 1503 MOVL SI, DI 1504 CLD; REPN; SCASB 1505 JZ 3(PC) 1506 MOVL $-1, ret+16(FP) 1507 RET 1508 SUBL SI, DI 1509 SUBL $1, DI 1510 MOVL DI, ret+16(FP) 1511 RET 1512 1513 TEXT stringsIndexByte(SB),NOSPLIT,$0-16 1514 MOVL s+0(FP), SI 1515 MOVL s_len+4(FP), CX 1516 MOVB c+8(FP), AL 1517 MOVL SI, DI 1518 CLD; REPN; SCASB 1519 JZ 3(PC) 1520 MOVL $-1, ret+12(FP) 1521 RET 1522 SUBL SI, DI 1523 SUBL $1, DI 1524 MOVL DI, ret+12(FP) 1525 RET 1526 1527 // input: 1528 // SI = a 1529 // DI = b 1530 // BX = alen 1531 // DX = blen 1532 // AX = address of return word (set to 1/0/-1) 1533 TEXT runtimecmpbody(SB),NOSPLIT,$0-0 1534 MOVL DX, BP 1535 SUBL BX, DX // DX = blen-alen 1536 JLE 2(PC) 1537 MOVL BX, BP // BP = min(alen, blen) 1538 CMPL SI, DI 1539 JEQ allsame 1540 CMPL BP, $4 1541 JB small 1542 CMPB runtimesupport_sse2(SB), $1 1543 JNE mediumloop 1544 largeloop: 1545 CMPL BP, $16 1546 JB mediumloop 1547 MOVOU (SI), X0 1548 MOVOU (DI), X1 1549 PCMPEQB X0, X1 1550 PMOVMSKB X1, BX 1551 XORL $0xffff, BX // convert EQ to NE 1552 JNE diff16 // branch if at least one byte is not equal 1553 ADDL $16, SI 1554 ADDL $16, DI 1555 SUBL $16, BP 1556 JMP largeloop 1557 1558 diff16: 1559 BSFL BX, BX // index of first byte that differs 1560 XORL DX, DX 1561 MOVB (SI)(BX*1), CX 1562 CMPB CX, (DI)(BX*1) 1563 SETHI DX 1564 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1565 MOVL DX, (AX) 1566 RET 1567 1568 mediumloop: 1569 CMPL BP, $4 1570 JBE _0through4 1571 MOVL (SI), BX 1572 MOVL (DI), CX 1573 CMPL BX, CX 1574 JNE diff4 1575 ADDL $4, SI 1576 ADDL $4, DI 1577 SUBL $4, BP 1578 JMP mediumloop 1579 1580 _0through4: 1581 MOVL -4(SI)(BP*1), BX 1582 MOVL -4(DI)(BP*1), CX 1583 CMPL BX, CX 1584 JEQ allsame 1585 1586 diff4: 1587 BSWAPL BX // reverse order of bytes 1588 BSWAPL CX 1589 XORL BX, CX // find bit differences 1590 BSRL CX, CX // index of highest bit difference 1591 SHRL CX, BX // move a's bit to bottom 1592 ANDL $1, BX // mask bit 1593 LEAL -1(BX*2), BX // 1/0 => +1/-1 1594 MOVL BX, (AX) 1595 RET 1596 1597 // 0-3 bytes in common 1598 small: 1599 LEAL (BP*8), CX 1600 NEGL CX 1601 JEQ allsame 1602 1603 // load si 1604 CMPB SI, $0xfc 1605 JA si_high 1606 MOVL (SI), SI 1607 JMP si_finish 1608 si_high: 1609 MOVL -4(SI)(BP*1), SI 1610 SHRL CX, SI 1611 si_finish: 1612 SHLL CX, SI 1613 1614 // same for di 1615 CMPB DI, $0xfc 1616 JA di_high 1617 MOVL (DI), DI 1618 JMP di_finish 1619 di_high: 1620 MOVL -4(DI)(BP*1), DI 1621 SHRL CX, DI 1622 di_finish: 1623 SHLL CX, DI 1624 1625 BSWAPL SI // reverse order of bytes 1626 BSWAPL DI 1627 XORL SI, DI // find bit differences 1628 JEQ allsame 1629 BSRL DI, CX // index of highest bit difference 1630 SHRL CX, SI // move a's bit to bottom 1631 ANDL $1, SI // mask bit 1632 LEAL -1(SI*2), BX // 1/0 => +1/-1 1633 MOVL BX, (AX) 1634 RET 1635 1636 // all the bytes in common are the same, so we just need 1637 // to compare the lengths. 1638 allsame: 1639 XORL BX, BX 1640 XORL CX, CX 1641 TESTL DX, DX 1642 SETLT BX // 1 if alen > blen 1643 SETEQ CX // 1 if alen == blen 1644 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1645 MOVL BX, (AX) 1646 RET 1647 1648 TEXT runtimereturn0(SB), NOSPLIT, $0 1649 MOVL $0, AX 1650 RET 1651 1652 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1653 // Must obey the gcc calling convention. 1654 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1655 get_tls(CX) 1656 MOVL g(CX), AX 1657 MOVL g_m(AX), AX 1658 MOVL m_curg(AX), AX 1659 MOVL (g_stack+stack_hi)(AX), AX 1660 RET 1661 1662 // The top-most function running on a goroutine 1663 // returns to goexit+PCQuantum. 1664 TEXT runtimegoexit(SB),NOSPLIT,$0-0 1665 BYTE $0x90 // NOP 1666 CALL runtimegoexit1(SB) // does not return 1667 // traceback from goexit1 must hit code range of goexit 1668 BYTE $0x90 // NOP 1669 1670 // Add a module's moduledata to the linked list of moduledata objects. This 1671 // is called from .init_array by a function generated in the linker and so 1672 // follows the platform ABI wrt register preservation -- it only touches AX, 1673 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1674 // instead the pointer to the moduledata is passed in AX. 1675 TEXT runtimeaddmoduledata(SB),NOSPLIT,$0-0 1676 MOVL runtimelastmoduledatap(SB), DX 1677 MOVL AX, moduledata_next(DX) 1678 MOVL AX, runtimelastmoduledatap(SB) 1679 RET 1680 1681 TEXT runtimeuint32tofloat64(SB),NOSPLIT,$8-12 1682 MOVL a+0(FP), AX 1683 MOVL AX, 0(SP) 1684 MOVL $0, 4(SP) 1685 FMOVV 0(SP), F0 1686 FMOVDP F0, ret+4(FP) 1687 RET 1688 1689 TEXT runtimefloat64touint32(SB),NOSPLIT,$12-12 1690 FMOVD a+0(FP), F0 1691 FSTCW 0(SP) 1692 FLDCW runtimecontrolWord64trunc(SB) 1693 FMOVVP F0, 4(SP) 1694 FLDCW 0(SP) 1695 MOVL 4(SP), AX 1696 MOVL AX, ret+8(FP) 1697 RET 1698