Home | History | Annotate | Download | only in runtime
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "go_asm.h"
      6 #include "go_tls.h"
      7 #include "funcdata.h"
      8 #include "textflag.h"
      9 
     10 TEXT runtimert0_go(SB),NOSPLIT,$0
     11 	// copy arguments forward on an even stack
     12 	MOVL	argc+0(FP), AX
     13 	MOVL	argv+4(FP), BX
     14 	MOVL	SP, CX
     15 	SUBL	$128, CX		// plenty of scratch
     16 	ANDL	$~15, CX
     17 	MOVL	CX, SP
     18 
     19 	MOVL	AX, 16(SP)
     20 	MOVL	BX, 24(SP)
     21 
     22 	// create istack out of the given (operating system) stack.
     23 	MOVL	$runtimeg0(SB), DI
     24 	LEAL	(-64*1024+104)(SP), BX
     25 	MOVL	BX, g_stackguard0(DI)
     26 	MOVL	BX, g_stackguard1(DI)
     27 	MOVL	BX, (g_stack+stack_lo)(DI)
     28 	MOVL	SP, (g_stack+stack_hi)(DI)
     29 
     30 	// find out information about the processor we're on
     31 	MOVQ	$0, AX
     32 	CPUID
     33 	CMPQ	AX, $0
     34 	JE	nocpuinfo
     35 	MOVQ	$1, AX
     36 	CPUID
     37 	MOVL	CX, runtimecpuid_ecx(SB)
     38 	MOVL	DX, runtimecpuid_edx(SB)
     39 nocpuinfo:
     40 
     41 needtls:
     42 	LEAL	runtimem0+m_tls(SB), DI
     43 	CALL	runtimesettls(SB)
     44 
     45 	// store through it, to make sure it works
     46 	get_tls(BX)
     47 	MOVQ	$0x123, g(BX)
     48 	MOVQ	runtimem0+m_tls(SB), AX
     49 	CMPQ	AX, $0x123
     50 	JEQ 2(PC)
     51 	MOVL	AX, 0	// abort
     52 ok:
     53 	// set the per-goroutine and per-mach "registers"
     54 	get_tls(BX)
     55 	LEAL	runtimeg0(SB), CX
     56 	MOVL	CX, g(BX)
     57 	LEAL	runtimem0(SB), AX
     58 
     59 	// save m->g0 = g0
     60 	MOVL	CX, m_g0(AX)
     61 	// save m0 to g0->m
     62 	MOVL	AX, g_m(CX)
     63 
     64 	CLD				// convention is D is always left cleared
     65 	CALL	runtimecheck(SB)
     66 
     67 	MOVL	16(SP), AX		// copy argc
     68 	MOVL	AX, 0(SP)
     69 	MOVL	24(SP), AX		// copy argv
     70 	MOVL	AX, 4(SP)
     71 	CALL	runtimeargs(SB)
     72 	CALL	runtimeosinit(SB)
     73 	CALL	runtimeschedinit(SB)
     74 
     75 	// create a new goroutine to start program
     76 	MOVL	$runtimemainPC(SB), AX	// entry
     77 	MOVL	$0, 0(SP)
     78 	MOVL	AX, 4(SP)
     79 	CALL	runtimenewproc(SB)
     80 
     81 	// start this M
     82 	CALL	runtimemstart(SB)
     83 
     84 	MOVL	$0xf1, 0xf1  // crash
     85 	RET
     86 
     87 DATA	runtimemainPC+0(SB)/4,$runtimemain(SB)
     88 GLOBL	runtimemainPC(SB),RODATA,$4
     89 
     90 TEXT runtimebreakpoint(SB),NOSPLIT,$0-0
     91 	INT $3
     92 	RET
     93 
     94 TEXT runtimeasminit(SB),NOSPLIT,$0-0
     95 	// No per-thread init.
     96 	RET
     97 
     98 /*
     99  *  go-routine
    100  */
    101 
    102 // void gosave(Gobuf*)
    103 // save state in Gobuf; setjmp
    104 TEXT runtimegosave(SB), NOSPLIT, $0-4
    105 	MOVL	buf+0(FP), AX	// gobuf
    106 	LEAL	buf+0(FP), BX	// caller's SP
    107 	MOVL	BX, gobuf_sp(AX)
    108 	MOVL	0(SP), BX		// caller's PC
    109 	MOVL	BX, gobuf_pc(AX)
    110 	MOVQ	$0, gobuf_ret(AX)
    111 	// Assert ctxt is zero. See func save.
    112 	MOVL	gobuf_ctxt(AX), BX
    113 	TESTL	BX, BX
    114 	JZ	2(PC)
    115 	CALL	runtimebadctxt(SB)
    116 	get_tls(CX)
    117 	MOVL	g(CX), BX
    118 	MOVL	BX, gobuf_g(AX)
    119 	RET
    120 
    121 // void gogo(Gobuf*)
    122 // restore state from Gobuf; longjmp
    123 TEXT runtimegogo(SB), NOSPLIT, $8-4
    124 	MOVL	buf+0(FP), BX		// gobuf
    125 
    126 	// If ctxt is not nil, invoke deletion barrier before overwriting.
    127 	MOVL	gobuf_ctxt(BX), DX
    128 	TESTL	DX, DX
    129 	JZ	nilctxt
    130 	LEAL	gobuf_ctxt(BX), AX
    131 	MOVL	AX, 0(SP)
    132 	MOVL	$0, 4(SP)
    133 	CALL	runtimewritebarrierptr_prewrite(SB)
    134 	MOVL	buf+0(FP), BX
    135 
    136 nilctxt:
    137 	MOVL	gobuf_g(BX), DX
    138 	MOVL	0(DX), CX		// make sure g != nil
    139 	get_tls(CX)
    140 	MOVL	DX, g(CX)
    141 	MOVL	gobuf_sp(BX), SP	// restore SP
    142 	MOVL	gobuf_ctxt(BX), DX
    143 	MOVQ	gobuf_ret(BX), AX
    144 	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
    145 	MOVQ	$0, gobuf_ret(BX)
    146 	MOVL	$0, gobuf_ctxt(BX)
    147 	MOVL	gobuf_pc(BX), BX
    148 	JMP	BX
    149 
    150 // func mcall(fn func(*g))
    151 // Switch to m->g0's stack, call fn(g).
    152 // Fn must never return. It should gogo(&g->sched)
    153 // to keep running g.
    154 TEXT runtimemcall(SB), NOSPLIT, $0-4
    155 	MOVL	fn+0(FP), DI
    156 
    157 	get_tls(CX)
    158 	MOVL	g(CX), AX	// save state in g->sched
    159 	MOVL	0(SP), BX	// caller's PC
    160 	MOVL	BX, (g_sched+gobuf_pc)(AX)
    161 	LEAL	fn+0(FP), BX	// caller's SP
    162 	MOVL	BX, (g_sched+gobuf_sp)(AX)
    163 	MOVL	AX, (g_sched+gobuf_g)(AX)
    164 
    165 	// switch to m->g0 & its stack, call fn
    166 	MOVL	g(CX), BX
    167 	MOVL	g_m(BX), BX
    168 	MOVL	m_g0(BX), SI
    169 	CMPL	SI, AX	// if g == m->g0 call badmcall
    170 	JNE	3(PC)
    171 	MOVL	$runtimebadmcall(SB), AX
    172 	JMP	AX
    173 	MOVL	SI, g(CX)	// g = m->g0
    174 	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
    175 	PUSHQ	AX
    176 	MOVL	DI, DX
    177 	MOVL	0(DI), DI
    178 	CALL	DI
    179 	POPQ	AX
    180 	MOVL	$runtimebadmcall2(SB), AX
    181 	JMP	AX
    182 	RET
    183 
    184 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
    185 // of the G stack. We need to distinguish the routine that
    186 // lives at the bottom of the G stack from the one that lives
    187 // at the top of the system stack because the one at the top of
    188 // the system stack terminates the stack walk (see topofstack()).
    189 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0
    190 	RET
    191 
    192 // func systemstack(fn func())
    193 TEXT runtimesystemstack(SB), NOSPLIT, $0-4
    194 	MOVL	fn+0(FP), DI	// DI = fn
    195 	get_tls(CX)
    196 	MOVL	g(CX), AX	// AX = g
    197 	MOVL	g_m(AX), BX	// BX = m
    198 
    199 	MOVL	m_gsignal(BX), DX	// DX = gsignal
    200 	CMPL	AX, DX
    201 	JEQ	noswitch
    202 
    203 	MOVL	m_g0(BX), DX	// DX = g0
    204 	CMPL	AX, DX
    205 	JEQ	noswitch
    206 
    207 	MOVL	m_curg(BX), R8
    208 	CMPL	AX, R8
    209 	JEQ	switch
    210 
    211 	// Not g0, not curg. Must be gsignal, but that's not allowed.
    212 	// Hide call from linker nosplit analysis.
    213 	MOVL	$runtimebadsystemstack(SB), AX
    214 	CALL	AX
    215 
    216 switch:
    217 	// save our state in g->sched. Pretend to
    218 	// be systemstack_switch if the G stack is scanned.
    219 	MOVL	$runtimesystemstack_switch(SB), SI
    220 	MOVL	SI, (g_sched+gobuf_pc)(AX)
    221 	MOVL	SP, (g_sched+gobuf_sp)(AX)
    222 	MOVL	AX, (g_sched+gobuf_g)(AX)
    223 
    224 	// switch to g0
    225 	MOVL	DX, g(CX)
    226 	MOVL	(g_sched+gobuf_sp)(DX), SP
    227 
    228 	// call target function
    229 	MOVL	DI, DX
    230 	MOVL	0(DI), DI
    231 	CALL	DI
    232 
    233 	// switch back to g
    234 	get_tls(CX)
    235 	MOVL	g(CX), AX
    236 	MOVL	g_m(AX), BX
    237 	MOVL	m_curg(BX), AX
    238 	MOVL	AX, g(CX)
    239 	MOVL	(g_sched+gobuf_sp)(AX), SP
    240 	MOVL	$0, (g_sched+gobuf_sp)(AX)
    241 	RET
    242 
    243 noswitch:
    244 	// already on m stack, just call directly
    245 	MOVL	DI, DX
    246 	MOVL	0(DI), DI
    247 	CALL	DI
    248 	RET
    249 
    250 /*
    251  * support for morestack
    252  */
    253 
    254 // Called during function prolog when more stack is needed.
    255 //
    256 // The traceback routines see morestack on a g0 as being
    257 // the top of a stack (for example, morestack calling newstack
    258 // calling the scheduler calling newm calling gc), so we must
    259 // record an argument size. For that purpose, it has no arguments.
    260 TEXT runtimemorestack(SB),NOSPLIT,$0-0
    261 	get_tls(CX)
    262 	MOVL	g(CX), BX
    263 	MOVL	g_m(BX), BX
    264 
    265 	// Cannot grow scheduler stack (m->g0).
    266 	MOVL	m_g0(BX), SI
    267 	CMPL	g(CX), SI
    268 	JNE	3(PC)
    269 	CALL	runtimebadmorestackg0(SB)
    270 	MOVL	0, AX
    271 
    272 	// Cannot grow signal stack (m->gsignal).
    273 	MOVL	m_gsignal(BX), SI
    274 	CMPL	g(CX), SI
    275 	JNE	3(PC)
    276 	CALL	runtimebadmorestackgsignal(SB)
    277 	MOVL	0, AX
    278 
    279 	// Called from f.
    280 	// Set m->morebuf to f's caller.
    281 	MOVL	8(SP), AX	// f's caller's PC
    282 	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
    283 	LEAL	16(SP), AX	// f's caller's SP
    284 	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
    285 	get_tls(CX)
    286 	MOVL	g(CX), SI
    287 	MOVL	SI, (m_morebuf+gobuf_g)(BX)
    288 
    289 	// Set g->sched to context in f.
    290 	MOVL	0(SP), AX // f's PC
    291 	MOVL	AX, (g_sched+gobuf_pc)(SI)
    292 	MOVL	SI, (g_sched+gobuf_g)(SI)
    293 	LEAL	8(SP), AX // f's SP
    294 	MOVL	AX, (g_sched+gobuf_sp)(SI)
    295 	// newstack will fill gobuf.ctxt.
    296 
    297 	// Call newstack on m->g0's stack.
    298 	MOVL	m_g0(BX), BX
    299 	MOVL	BX, g(CX)
    300 	MOVL	(g_sched+gobuf_sp)(BX), SP
    301 	PUSHQ	DX	// ctxt argument
    302 	CALL	runtimenewstack(SB)
    303 	MOVL	$0, 0x1003	// crash if newstack returns
    304 	POPQ	DX	// keep balance check happy
    305 	RET
    306 
    307 // morestack trampolines
    308 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$0
    309 	MOVL	$0, DX
    310 	JMP	runtimemorestack(SB)
    311 
    312 TEXT runtimestackBarrier(SB),NOSPLIT,$0
    313 	// We came here via a RET to an overwritten return PC.
    314 	// AX may be live. Other registers are available.
    315 
    316 	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
    317 	get_tls(CX)
    318 	MOVL	g(CX), CX
    319 	MOVL	(g_stkbar+slice_array)(CX), DX
    320 	MOVL	g_stkbarPos(CX), BX
    321 	IMULL	$stkbar__size, BX	// Too big for SIB.
    322 	ADDL	DX, BX
    323 	MOVL	stkbar_savedLRVal(BX), BX
    324 	// Record that this stack barrier was hit.
    325 	ADDL	$1, g_stkbarPos(CX)
    326 	// Jump to the original return PC.
    327 	JMP	BX
    328 
    329 // reflectcall: call a function with the given argument list
    330 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
    331 // we don't have variable-sized frames, so we use a small number
    332 // of constant-sized-frame functions to encode a few bits of size in the pc.
    333 // Caution: ugly multiline assembly macros in your future!
    334 
    335 #define DISPATCH(NAME,MAXSIZE)		\
    336 	CMPL	CX, $MAXSIZE;		\
    337 	JA	3(PC);			\
    338 	MOVL	$NAME(SB), AX;		\
    339 	JMP	AX
    340 // Note: can't just "JMP NAME(SB)" - bad inlining results.
    341 
    342 TEXT reflectcall(SB), NOSPLIT, $0-0
    343 	JMP	reflectcall(SB)
    344 
    345 TEXT reflectcall(SB), NOSPLIT, $0-20
    346 	MOVLQZX argsize+12(FP), CX
    347 	DISPATCH(runtimecall16, 16)
    348 	DISPATCH(runtimecall32, 32)
    349 	DISPATCH(runtimecall64, 64)
    350 	DISPATCH(runtimecall128, 128)
    351 	DISPATCH(runtimecall256, 256)
    352 	DISPATCH(runtimecall512, 512)
    353 	DISPATCH(runtimecall1024, 1024)
    354 	DISPATCH(runtimecall2048, 2048)
    355 	DISPATCH(runtimecall4096, 4096)
    356 	DISPATCH(runtimecall8192, 8192)
    357 	DISPATCH(runtimecall16384, 16384)
    358 	DISPATCH(runtimecall32768, 32768)
    359 	DISPATCH(runtimecall65536, 65536)
    360 	DISPATCH(runtimecall131072, 131072)
    361 	DISPATCH(runtimecall262144, 262144)
    362 	DISPATCH(runtimecall524288, 524288)
    363 	DISPATCH(runtimecall1048576, 1048576)
    364 	DISPATCH(runtimecall2097152, 2097152)
    365 	DISPATCH(runtimecall4194304, 4194304)
    366 	DISPATCH(runtimecall8388608, 8388608)
    367 	DISPATCH(runtimecall16777216, 16777216)
    368 	DISPATCH(runtimecall33554432, 33554432)
    369 	DISPATCH(runtimecall67108864, 67108864)
    370 	DISPATCH(runtimecall134217728, 134217728)
    371 	DISPATCH(runtimecall268435456, 268435456)
    372 	DISPATCH(runtimecall536870912, 536870912)
    373 	DISPATCH(runtimecall1073741824, 1073741824)
    374 	MOVL	$runtimebadreflectcall(SB), AX
    375 	JMP	AX
    376 
    377 #define CALLFN(NAME,MAXSIZE)			\
    378 TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
    379 	NO_LOCAL_POINTERS;			\
    380 	/* copy arguments to stack */		\
    381 	MOVL	argptr+8(FP), SI;		\
    382 	MOVL	argsize+12(FP), CX;		\
    383 	MOVL	SP, DI;				\
    384 	REP;MOVSB;				\
    385 	/* call function */			\
    386 	MOVL	f+4(FP), DX;			\
    387 	MOVL	(DX), AX;			\
    388 	CALL	AX;				\
    389 	/* copy return values back */		\
    390 	MOVL	argtype+0(FP), DX;		\
    391 	MOVL	argptr+8(FP), DI;		\
    392 	MOVL	argsize+12(FP), CX;		\
    393 	MOVL	retoffset+16(FP), BX;		\
    394 	MOVL	SP, SI;				\
    395 	ADDL	BX, DI;				\
    396 	ADDL	BX, SI;				\
    397 	SUBL	BX, CX;				\
    398 	CALL	callRet<>(SB);			\
    399 	RET
    400 
    401 // callRet copies return values back at the end of call*. This is a
    402 // separate function so it can allocate stack space for the arguments
    403 // to reflectcallmove. It does not follow the Go ABI; it expects its
    404 // arguments in registers.
    405 TEXT callRet<>(SB), NOSPLIT, $16-0
    406 	MOVL	DX, 0(SP)
    407 	MOVL	DI, 4(SP)
    408 	MOVL	SI, 8(SP)
    409 	MOVL	CX, 12(SP)
    410 	CALL	runtimereflectcallmove(SB)
    411 	RET
    412 
    413 CALLFN(call16, 16)
    414 CALLFN(call32, 32)
    415 CALLFN(call64, 64)
    416 CALLFN(call128, 128)
    417 CALLFN(call256, 256)
    418 CALLFN(call512, 512)
    419 CALLFN(call1024, 1024)
    420 CALLFN(call2048, 2048)
    421 CALLFN(call4096, 4096)
    422 CALLFN(call8192, 8192)
    423 CALLFN(call16384, 16384)
    424 CALLFN(call32768, 32768)
    425 CALLFN(call65536, 65536)
    426 CALLFN(call131072, 131072)
    427 CALLFN(call262144, 262144)
    428 CALLFN(call524288, 524288)
    429 CALLFN(call1048576, 1048576)
    430 CALLFN(call2097152, 2097152)
    431 CALLFN(call4194304, 4194304)
    432 CALLFN(call8388608, 8388608)
    433 CALLFN(call16777216, 16777216)
    434 CALLFN(call33554432, 33554432)
    435 CALLFN(call67108864, 67108864)
    436 CALLFN(call134217728, 134217728)
    437 CALLFN(call268435456, 268435456)
    438 CALLFN(call536870912, 536870912)
    439 CALLFN(call1073741824, 1073741824)
    440 
    441 TEXT runtimeprocyield(SB),NOSPLIT,$0-0
    442 	MOVL	cycles+0(FP), AX
    443 again:
    444 	PAUSE
    445 	SUBL	$1, AX
    446 	JNZ	again
    447 	RET
    448 
    449 TEXT publicationBarrier(SB),NOSPLIT,$0-0
    450 	// Stores are already ordered on x86, so this is just a
    451 	// compile barrier.
    452 	RET
    453 
    454 // void jmpdefer(fn, sp);
    455 // called from deferreturn.
    456 // 1. pop the caller
    457 // 2. sub 5 bytes from the callers return
    458 // 3. jmp to the argument
    459 TEXT runtimejmpdefer(SB), NOSPLIT, $0-8
    460 	MOVL	fv+0(FP), DX
    461 	MOVL	argp+4(FP), BX
    462 	LEAL	-8(BX), SP	// caller sp after CALL
    463 	SUBL	$5, (SP)	// return to CALL again
    464 	MOVL	0(DX), BX
    465 	JMP	BX	// but first run the deferred function
    466 
    467 // func asmcgocall(fn, arg unsafe.Pointer) int32
    468 // Not implemented.
    469 TEXT runtimeasmcgocall(SB),NOSPLIT,$0-12
    470 	MOVL	0, AX
    471 	RET
    472 
    473 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
    474 // Not implemented.
    475 TEXT runtimecgocallback(SB),NOSPLIT,$0-16
    476 	MOVL	0, AX
    477 	RET
    478 
    479 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
    480 // Not implemented.
    481 TEXT cgocallback_gofunc(SB),NOSPLIT,$0-16
    482 	MOVL	0, AX
    483 	RET
    484 
    485 // void setg(G*); set g. for use by needm.
    486 // Not implemented.
    487 TEXT runtimesetg(SB), NOSPLIT, $0-4
    488 	MOVL	0, AX
    489 	RET
    490 
    491 // check that SP is in range [g->stack.lo, g->stack.hi)
    492 TEXT runtimestackcheck(SB), NOSPLIT, $0-0
    493 	get_tls(CX)
    494 	MOVL	g(CX), AX
    495 	CMPL	(g_stack+stack_hi)(AX), SP
    496 	JHI	2(PC)
    497 	MOVL	0, AX
    498 	CMPL	SP, (g_stack+stack_lo)(AX)
    499 	JHI	2(PC)
    500 	MOVL	0, AX
    501 	RET
    502 
    503 TEXT runtimememclrNoHeapPointers(SB),NOSPLIT,$0-8
    504 	MOVL	ptr+0(FP), DI
    505 	MOVL	n+4(FP), CX
    506 	MOVQ	CX, BX
    507 	ANDQ	$3, BX
    508 	SHRQ	$2, CX
    509 	MOVQ	$0, AX
    510 	CLD
    511 	REP
    512 	STOSL
    513 	MOVQ	BX, CX
    514 	REP
    515 	STOSB
    516 	// Note: we zero only 4 bytes at a time so that the tail is at most
    517 	// 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
    518 	// See issue 13160.
    519 	RET
    520 
    521 TEXT runtimegetcallerpc(SB),NOSPLIT,$8-12
    522 	MOVL	argp+0(FP),AX		// addr of first arg
    523 	MOVL	-8(AX),AX		// get calling pc
    524 	CMPL	AX, runtimestackBarrierPC(SB)
    525 	JNE	nobar
    526 	// Get original return PC.
    527 	CALL	runtimenextBarrierPC(SB)
    528 	MOVL	0(SP), AX
    529 nobar:
    530 	MOVL	AX, ret+8(FP)
    531 	RET
    532 
    533 TEXT runtimesetcallerpc(SB),NOSPLIT,$8-8
    534 	MOVL	argp+0(FP),AX		// addr of first arg
    535 	MOVL	pc+4(FP), BX		// pc to set
    536 	MOVL	-8(AX), CX
    537 	CMPL	CX, runtimestackBarrierPC(SB)
    538 	JEQ	setbar
    539 	MOVQ	BX, -8(AX)		// set calling pc
    540 	RET
    541 setbar:
    542 	// Set the stack barrier return PC.
    543 	MOVL	BX, 0(SP)
    544 	CALL	runtimesetNextBarrierPC(SB)
    545 	RET
    546 
    547 // int64 runtimecputicks(void)
    548 TEXT runtimecputicks(SB),NOSPLIT,$0-0
    549 	RDTSC
    550 	SHLQ	$32, DX
    551 	ADDQ	DX, AX
    552 	MOVQ	AX, ret+0(FP)
    553 	RET
    554 
    555 // memhash_varlen(p unsafe.Pointer, h seed) uintptr
    556 // redirects to memhash(p, h, size) using the size
    557 // stored in the closure.
    558 TEXT runtimememhash_varlen(SB),NOSPLIT,$24-12
    559 	GO_ARGS
    560 	NO_LOCAL_POINTERS
    561 	MOVL	p+0(FP), AX
    562 	MOVL	h+4(FP), BX
    563 	MOVL	4(DX), CX
    564 	MOVL	AX, 0(SP)
    565 	MOVL	BX, 4(SP)
    566 	MOVL	CX, 8(SP)
    567 	CALL	runtimememhash(SB)
    568 	MOVL	16(SP), AX
    569 	MOVL	AX, ret+8(FP)
    570 	RET
    571 
    572 // hash function using AES hardware instructions
    573 // For now, our one amd64p32 system (NaCl) does not
    574 // support using AES instructions, so have not bothered to
    575 // write the implementations. Can copy and adjust the ones
    576 // in asm_amd64.s when the time comes.
    577 
    578 TEXT runtimeaeshash(SB),NOSPLIT,$0-20
    579 	MOVL	AX, ret+16(FP)
    580 	RET
    581 
    582 TEXT runtimeaeshashstr(SB),NOSPLIT,$0-12
    583 	MOVL	AX, ret+8(FP)
    584 	RET
    585 
    586 TEXT runtimeaeshash32(SB),NOSPLIT,$0-12
    587 	MOVL	AX, ret+8(FP)
    588 	RET
    589 
    590 TEXT runtimeaeshash64(SB),NOSPLIT,$0-12
    591 	MOVL	AX, ret+8(FP)
    592 	RET
    593 
    594 // memequal(p, q unsafe.Pointer, size uintptr) bool
    595 TEXT runtimememequal(SB),NOSPLIT,$0-17
    596 	MOVL	a+0(FP), SI
    597 	MOVL	b+4(FP), DI
    598 	CMPL	SI, DI
    599 	JEQ	eq
    600 	MOVL	size+8(FP), BX
    601 	CALL	runtimememeqbody(SB)
    602 	MOVB	AX, ret+16(FP)
    603 	RET
    604 eq:
    605 	MOVB    $1, ret+16(FP)
    606 	RET
    607 
    608 // memequal_varlen(a, b unsafe.Pointer) bool
    609 TEXT runtimememequal_varlen(SB),NOSPLIT,$0-9
    610 	MOVL    a+0(FP), SI
    611 	MOVL    b+4(FP), DI
    612 	CMPL    SI, DI
    613 	JEQ     eq
    614 	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    615 	CALL    runtimememeqbody(SB)
    616 	MOVB    AX, ret+8(FP)
    617 	RET
    618 eq:
    619 	MOVB    $1, ret+8(FP)
    620 	RET
    621 
    622 // eqstring tests whether two strings are equal.
    623 // The compiler guarantees that strings passed
    624 // to eqstring have equal length.
    625 // See runtime_test.go:eqstring_generic for
    626 // equivalent Go code.
    627 TEXT runtimeeqstring(SB),NOSPLIT,$0-17
    628 	MOVL	s1_base+0(FP), SI
    629 	MOVL	s2_base+8(FP), DI
    630 	CMPL	SI, DI
    631 	JEQ	same
    632 	MOVL	s1_len+4(FP), BX
    633 	CALL	runtimememeqbody(SB)
    634 	MOVB	AX, ret+16(FP)
    635 	RET
    636 same:
    637 	MOVB	$1, ret+16(FP)
    638 	RET
    639 
    640 // a in SI
    641 // b in DI
    642 // count in BX
    643 TEXT runtimememeqbody(SB),NOSPLIT,$0-0
    644 	XORQ	AX, AX
    645 
    646 	CMPQ	BX, $8
    647 	JB	small
    648 
    649 	// 64 bytes at a time using xmm registers
    650 hugeloop:
    651 	CMPQ	BX, $64
    652 	JB	bigloop
    653 	MOVOU	(SI), X0
    654 	MOVOU	(DI), X1
    655 	MOVOU	16(SI), X2
    656 	MOVOU	16(DI), X3
    657 	MOVOU	32(SI), X4
    658 	MOVOU	32(DI), X5
    659 	MOVOU	48(SI), X6
    660 	MOVOU	48(DI), X7
    661 	PCMPEQB	X1, X0
    662 	PCMPEQB	X3, X2
    663 	PCMPEQB	X5, X4
    664 	PCMPEQB	X7, X6
    665 	PAND	X2, X0
    666 	PAND	X6, X4
    667 	PAND	X4, X0
    668 	PMOVMSKB X0, DX
    669 	ADDQ	$64, SI
    670 	ADDQ	$64, DI
    671 	SUBQ	$64, BX
    672 	CMPL	DX, $0xffff
    673 	JEQ	hugeloop
    674 	RET
    675 
    676 	// 8 bytes at a time using 64-bit register
    677 bigloop:
    678 	CMPQ	BX, $8
    679 	JBE	leftover
    680 	MOVQ	(SI), CX
    681 	MOVQ	(DI), DX
    682 	ADDQ	$8, SI
    683 	ADDQ	$8, DI
    684 	SUBQ	$8, BX
    685 	CMPQ	CX, DX
    686 	JEQ	bigloop
    687 	RET
    688 
    689 	// remaining 0-8 bytes
    690 leftover:
    691 	ADDQ	BX, SI
    692 	ADDQ	BX, DI
    693 	MOVQ	-8(SI), CX
    694 	MOVQ	-8(DI), DX
    695 	CMPQ	CX, DX
    696 	SETEQ	AX
    697 	RET
    698 
    699 small:
    700 	CMPQ	BX, $0
    701 	JEQ	equal
    702 
    703 	LEAQ	0(BX*8), CX
    704 	NEGQ	CX
    705 
    706 	CMPB	SI, $0xf8
    707 	JA	si_high
    708 
    709 	// load at SI won't cross a page boundary.
    710 	MOVQ	(SI), SI
    711 	JMP	si_finish
    712 si_high:
    713 	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
    714 	MOVQ	BX, DX
    715 	ADDQ	SI, DX
    716 	MOVQ	-8(DX), SI
    717 	SHRQ	CX, SI
    718 si_finish:
    719 
    720 	// same for DI.
    721 	CMPB	DI, $0xf8
    722 	JA	di_high
    723 	MOVQ	(DI), DI
    724 	JMP	di_finish
    725 di_high:
    726 	MOVQ	BX, DX
    727 	ADDQ	DI, DX
    728 	MOVQ	-8(DX), DI
    729 	SHRQ	CX, DI
    730 di_finish:
    731 
    732 	SUBQ	SI, DI
    733 	SHLQ	CX, DI
    734 equal:
    735 	SETEQ	AX
    736 	RET
    737 
    738 TEXT runtimecmpstring(SB),NOSPLIT,$0-20
    739 	MOVL	s1_base+0(FP), SI
    740 	MOVL	s1_len+4(FP), BX
    741 	MOVL	s2_base+8(FP), DI
    742 	MOVL	s2_len+12(FP), DX
    743 	CALL	runtimecmpbody(SB)
    744 	MOVL	AX, ret+16(FP)
    745 	RET
    746 
    747 TEXT bytesCompare(SB),NOSPLIT,$0-28
    748 	MOVL	s1+0(FP), SI
    749 	MOVL	s1+4(FP), BX
    750 	MOVL	s2+12(FP), DI
    751 	MOVL	s2+16(FP), DX
    752 	CALL	runtimecmpbody(SB)
    753 	MOVL	AX, res+24(FP)
    754 	RET
    755 
    756 // input:
    757 //   SI = a
    758 //   DI = b
    759 //   BX = alen
    760 //   DX = blen
    761 // output:
    762 //   AX = 1/0/-1
    763 TEXT runtimecmpbody(SB),NOSPLIT,$0-0
    764 	CMPQ	SI, DI
    765 	JEQ	allsame
    766 	CMPQ	BX, DX
    767 	MOVQ	DX, R8
    768 	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
    769 	CMPQ	R8, $8
    770 	JB	small
    771 
    772 loop:
    773 	CMPQ	R8, $16
    774 	JBE	_0through16
    775 	MOVOU	(SI), X0
    776 	MOVOU	(DI), X1
    777 	PCMPEQB X0, X1
    778 	PMOVMSKB X1, AX
    779 	XORQ	$0xffff, AX	// convert EQ to NE
    780 	JNE	diff16	// branch if at least one byte is not equal
    781 	ADDQ	$16, SI
    782 	ADDQ	$16, DI
    783 	SUBQ	$16, R8
    784 	JMP	loop
    785 
    786 	// AX = bit mask of differences
    787 diff16:
    788 	BSFQ	AX, BX	// index of first byte that differs
    789 	XORQ	AX, AX
    790 	ADDQ	BX, SI
    791 	MOVB	(SI), CX
    792 	ADDQ	BX, DI
    793 	CMPB	CX, (DI)
    794 	SETHI	AX
    795 	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
    796 	RET
    797 
    798 	// 0 through 16 bytes left, alen>=8, blen>=8
    799 _0through16:
    800 	CMPQ	R8, $8
    801 	JBE	_0through8
    802 	MOVQ	(SI), AX
    803 	MOVQ	(DI), CX
    804 	CMPQ	AX, CX
    805 	JNE	diff8
    806 _0through8:
    807 	ADDQ	R8, SI
    808 	ADDQ	R8, DI
    809 	MOVQ	-8(SI), AX
    810 	MOVQ	-8(DI), CX
    811 	CMPQ	AX, CX
    812 	JEQ	allsame
    813 
    814 	// AX and CX contain parts of a and b that differ.
    815 diff8:
    816 	BSWAPQ	AX	// reverse order of bytes
    817 	BSWAPQ	CX
    818 	XORQ	AX, CX
    819 	BSRQ	CX, CX	// index of highest bit difference
    820 	SHRQ	CX, AX	// move a's bit to bottom
    821 	ANDQ	$1, AX	// mask bit
    822 	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
    823 	RET
    824 
    825 	// 0-7 bytes in common
    826 small:
    827 	LEAQ	(R8*8), CX	// bytes left -> bits left
    828 	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
    829 	JEQ	allsame
    830 
    831 	// load bytes of a into high bytes of AX
    832 	CMPB	SI, $0xf8
    833 	JA	si_high
    834 	MOVQ	(SI), SI
    835 	JMP	si_finish
    836 si_high:
    837 	ADDQ	R8, SI
    838 	MOVQ	-8(SI), SI
    839 	SHRQ	CX, SI
    840 si_finish:
    841 	SHLQ	CX, SI
    842 
    843 	// load bytes of b in to high bytes of BX
    844 	CMPB	DI, $0xf8
    845 	JA	di_high
    846 	MOVQ	(DI), DI
    847 	JMP	di_finish
    848 di_high:
    849 	ADDQ	R8, DI
    850 	MOVQ	-8(DI), DI
    851 	SHRQ	CX, DI
    852 di_finish:
    853 	SHLQ	CX, DI
    854 
    855 	BSWAPQ	SI	// reverse order of bytes
    856 	BSWAPQ	DI
    857 	XORQ	SI, DI	// find bit differences
    858 	JEQ	allsame
    859 	BSRQ	DI, CX	// index of highest bit difference
    860 	SHRQ	CX, SI	// move a's bit to bottom
    861 	ANDQ	$1, SI	// mask bit
    862 	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
    863 	RET
    864 
    865 allsame:
    866 	XORQ	AX, AX
    867 	XORQ	CX, CX
    868 	CMPQ	BX, DX
    869 	SETGT	AX	// 1 if alen > blen
    870 	SETEQ	CX	// 1 if alen == blen
    871 	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
    872 	RET
    873 
    874 TEXT bytesIndexByte(SB),NOSPLIT,$0-20
    875 	MOVL s+0(FP), SI
    876 	MOVL s_len+4(FP), BX
    877 	MOVB c+12(FP), AL
    878 	CALL runtimeindexbytebody(SB)
    879 	MOVL AX, ret+16(FP)
    880 	RET
    881 
    882 TEXT stringsIndexByte(SB),NOSPLIT,$0-20
    883 	MOVL s+0(FP), SI
    884 	MOVL s_len+4(FP), BX
    885 	MOVB c+8(FP), AL
    886 	CALL runtimeindexbytebody(SB)
    887 	MOVL AX, ret+16(FP)
    888 	RET
    889 
    890 // input:
    891 //   SI: data
    892 //   BX: data len
    893 //   AL: byte sought
    894 // output:
    895 //   AX
    896 TEXT runtimeindexbytebody(SB),NOSPLIT,$0
    897 	MOVL SI, DI
    898 
    899 	CMPL BX, $16
    900 	JLT small
    901 
    902 	// round up to first 16-byte boundary
    903 	TESTL $15, SI
    904 	JZ aligned
    905 	MOVL SI, CX
    906 	ANDL $~15, CX
    907 	ADDL $16, CX
    908 
    909 	// search the beginning
    910 	SUBL SI, CX
    911 	REPN; SCASB
    912 	JZ success
    913 
    914 // DI is 16-byte aligned; get ready to search using SSE instructions
    915 aligned:
    916 	// round down to last 16-byte boundary
    917 	MOVL BX, R11
    918 	ADDL SI, R11
    919 	ANDL $~15, R11
    920 
    921 	// shuffle X0 around so that each byte contains c
    922 	MOVD AX, X0
    923 	PUNPCKLBW X0, X0
    924 	PUNPCKLBW X0, X0
    925 	PSHUFL $0, X0, X0
    926 	JMP condition
    927 
    928 sse:
    929 	// move the next 16-byte chunk of the buffer into X1
    930 	MOVO (DI), X1
    931 	// compare bytes in X0 to X1
    932 	PCMPEQB X0, X1
    933 	// take the top bit of each byte in X1 and put the result in DX
    934 	PMOVMSKB X1, DX
    935 	TESTL DX, DX
    936 	JNZ ssesuccess
    937 	ADDL $16, DI
    938 
    939 condition:
    940 	CMPL DI, R11
    941 	JLT sse
    942 
    943 	// search the end
    944 	MOVL SI, CX
    945 	ADDL BX, CX
    946 	SUBL R11, CX
    947 	// if CX == 0, the zero flag will be set and we'll end up
    948 	// returning a false success
    949 	JZ failure
    950 	REPN; SCASB
    951 	JZ success
    952 
    953 failure:
    954 	MOVL $-1, AX
    955 	RET
    956 
    957 // handle for lengths < 16
    958 small:
    959 	MOVL BX, CX
    960 	REPN; SCASB
    961 	JZ success
    962 	MOVL $-1, AX
    963 	RET
    964 
    965 // we've found the chunk containing the byte
    966 // now just figure out which specific byte it is
    967 ssesuccess:
    968 	// get the index of the least significant set bit
    969 	BSFW DX, DX
    970 	SUBL SI, DI
    971 	ADDL DI, DX
    972 	MOVL DX, AX
    973 	RET
    974 
    975 success:
    976 	SUBL SI, DI
    977 	SUBL $1, DI
    978 	MOVL DI, AX
    979 	RET
    980 
    981 TEXT bytesEqual(SB),NOSPLIT,$0-25
    982 	MOVL	a_len+4(FP), BX
    983 	MOVL	b_len+16(FP), CX
    984 	XORL	AX, AX
    985 	CMPL	BX, CX
    986 	JNE	eqret
    987 	MOVL	a+0(FP), SI
    988 	MOVL	b+12(FP), DI
    989 	CALL	runtimememeqbody(SB)
    990 eqret:
    991 	MOVB	AX, ret+24(FP)
    992 	RET
    993 
    994 TEXT runtimefastrand(SB), NOSPLIT, $0-4
    995 	get_tls(CX)
    996 	MOVL	g(CX), AX
    997 	MOVL	g_m(AX), AX
    998 	MOVL	m_fastrand(AX), DX
    999 	ADDL	DX, DX
   1000 	MOVL	DX, BX
   1001 	XORL	$0x88888eef, DX
   1002 	CMOVLMI	BX, DX
   1003 	MOVL	DX, m_fastrand(AX)
   1004 	MOVL	DX, ret+0(FP)
   1005 	RET
   1006 
   1007 TEXT runtimereturn0(SB), NOSPLIT, $0
   1008 	MOVL	$0, AX
   1009 	RET
   1010 
   1011 // The top-most function running on a goroutine
   1012 // returns to goexit+PCQuantum.
   1013 TEXT runtimegoexit(SB),NOSPLIT,$0-0
   1014 	BYTE	$0x90	// NOP
   1015 	CALL	runtimegoexit1(SB)	// does not return
   1016 	// traceback from goexit1 must hit code range of goexit
   1017 	BYTE	$0x90	// NOP
   1018 
   1019 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-4
   1020 	MOVL	addr+0(FP), AX
   1021 	PREFETCHT0	(AX)
   1022 	RET
   1023 
   1024 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-4
   1025 	MOVL	addr+0(FP), AX
   1026 	PREFETCHT1	(AX)
   1027 	RET
   1028 
   1029 
   1030 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-4
   1031 	MOVL	addr+0(FP), AX
   1032 	PREFETCHT2	(AX)
   1033 	RET
   1034 
   1035 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-4
   1036 	MOVL	addr+0(FP), AX
   1037 	PREFETCHNTA	(AX)
   1038 	RET
   1039 
   1040 TEXT checkASM(SB),NOSPLIT,$0-1
   1041 	MOVB	$1, ret+0(FP)
   1042 	RET
   1043