Home | History | Annotate | Download | only in runtime
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "go_asm.h"
      6 #include "go_tls.h"
      7 #include "funcdata.h"
      8 #include "textflag.h"
      9 
     10 TEXT runtimert0_go(SB),NOSPLIT,$0
     11 	// copy arguments forward on an even stack
     12 	MOVL	argc+0(FP), AX
     13 	MOVL	argv+4(FP), BX
     14 	MOVL	SP, CX
     15 	SUBL	$128, SP		// plenty of scratch
     16 	ANDL	$~15, CX
     17 	MOVL	CX, SP
     18 
     19 	MOVL	AX, 16(SP)
     20 	MOVL	BX, 24(SP)
     21 
     22 	// create istack out of the given (operating system) stack.
     23 	MOVL	$runtimeg0(SB), DI
     24 	LEAL	(-64*1024+104)(SP), BX
     25 	MOVL	BX, g_stackguard0(DI)
     26 	MOVL	BX, g_stackguard1(DI)
     27 	MOVL	BX, (g_stack+stack_lo)(DI)
     28 	MOVL	SP, (g_stack+stack_hi)(DI)
     29 
     30 	// find out information about the processor we're on
     31 	MOVQ	$0, AX
     32 	CPUID
     33 	CMPQ	AX, $0
     34 	JE	nocpuinfo
     35 	MOVQ	$1, AX
     36 	CPUID
     37 	MOVL	CX, runtimecpuid_ecx(SB)
     38 	MOVL	DX, runtimecpuid_edx(SB)
     39 nocpuinfo:
     40 
     41 needtls:
     42 	LEAL	runtimetls0(SB), DI
     43 	CALL	runtimesettls(SB)
     44 
     45 	// store through it, to make sure it works
     46 	get_tls(BX)
     47 	MOVQ	$0x123, g(BX)
     48 	MOVQ	runtimetls0(SB), AX
     49 	CMPQ	AX, $0x123
     50 	JEQ 2(PC)
     51 	MOVL	AX, 0	// abort
     52 ok:
     53 	// set the per-goroutine and per-mach "registers"
     54 	get_tls(BX)
     55 	LEAL	runtimeg0(SB), CX
     56 	MOVL	CX, g(BX)
     57 	LEAL	runtimem0(SB), AX
     58 
     59 	// save m->g0 = g0
     60 	MOVL	CX, m_g0(AX)
     61 	// save m0 to g0->m
     62 	MOVL	AX, g_m(CX)
     63 
     64 	CLD				// convention is D is always left cleared
     65 	CALL	runtimecheck(SB)
     66 
     67 	MOVL	16(SP), AX		// copy argc
     68 	MOVL	AX, 0(SP)
     69 	MOVL	24(SP), AX		// copy argv
     70 	MOVL	AX, 4(SP)
     71 	CALL	runtimeargs(SB)
     72 	CALL	runtimeosinit(SB)
     73 	CALL	runtimeschedinit(SB)
     74 
     75 	// create a new goroutine to start program
     76 	MOVL	$runtimemainPC(SB), AX	// entry
     77 	MOVL	$0, 0(SP)
     78 	MOVL	AX, 4(SP)
     79 	CALL	runtimenewproc(SB)
     80 
     81 	// start this M
     82 	CALL	runtimemstart(SB)
     83 
     84 	MOVL	$0xf1, 0xf1  // crash
     85 	RET
     86 
     87 DATA	runtimemainPC+0(SB)/4,$runtimemain(SB)
     88 GLOBL	runtimemainPC(SB),RODATA,$4
     89 
     90 TEXT runtimebreakpoint(SB),NOSPLIT,$0-0
     91 	INT $3
     92 	RET
     93 
     94 TEXT runtimeasminit(SB),NOSPLIT,$0-0
     95 	// No per-thread init.
     96 	RET
     97 
     98 /*
     99  *  go-routine
    100  */
    101 
    102 // void gosave(Gobuf*)
    103 // save state in Gobuf; setjmp
    104 TEXT runtimegosave(SB), NOSPLIT, $0-4
    105 	MOVL	buf+0(FP), AX	// gobuf
    106 	LEAL	buf+0(FP), BX	// caller's SP
    107 	MOVL	BX, gobuf_sp(AX)
    108 	MOVL	0(SP), BX		// caller's PC
    109 	MOVL	BX, gobuf_pc(AX)
    110 	MOVL	$0, gobuf_ctxt(AX)
    111 	MOVQ	$0, gobuf_ret(AX)
    112 	get_tls(CX)
    113 	MOVL	g(CX), BX
    114 	MOVL	BX, gobuf_g(AX)
    115 	RET
    116 
    117 // void gogo(Gobuf*)
    118 // restore state from Gobuf; longjmp
    119 TEXT runtimegogo(SB), NOSPLIT, $0-4
    120 	MOVL	buf+0(FP), BX		// gobuf
    121 	MOVL	gobuf_g(BX), DX
    122 	MOVL	0(DX), CX		// make sure g != nil
    123 	get_tls(CX)
    124 	MOVL	DX, g(CX)
    125 	MOVL	gobuf_sp(BX), SP	// restore SP
    126 	MOVL	gobuf_ctxt(BX), DX
    127 	MOVQ	gobuf_ret(BX), AX
    128 	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
    129 	MOVQ	$0, gobuf_ret(BX)
    130 	MOVL	$0, gobuf_ctxt(BX)
    131 	MOVL	gobuf_pc(BX), BX
    132 	JMP	BX
    133 
    134 // func mcall(fn func(*g))
    135 // Switch to m->g0's stack, call fn(g).
    136 // Fn must never return.  It should gogo(&g->sched)
    137 // to keep running g.
    138 TEXT runtimemcall(SB), NOSPLIT, $0-4
    139 	MOVL	fn+0(FP), DI
    140 
    141 	get_tls(CX)
    142 	MOVL	g(CX), AX	// save state in g->sched
    143 	MOVL	0(SP), BX	// caller's PC
    144 	MOVL	BX, (g_sched+gobuf_pc)(AX)
    145 	LEAL	fn+0(FP), BX	// caller's SP
    146 	MOVL	BX, (g_sched+gobuf_sp)(AX)
    147 	MOVL	AX, (g_sched+gobuf_g)(AX)
    148 
    149 	// switch to m->g0 & its stack, call fn
    150 	MOVL	g(CX), BX
    151 	MOVL	g_m(BX), BX
    152 	MOVL	m_g0(BX), SI
    153 	CMPL	SI, AX	// if g == m->g0 call badmcall
    154 	JNE	3(PC)
    155 	MOVL	$runtimebadmcall(SB), AX
    156 	JMP	AX
    157 	MOVL	SI, g(CX)	// g = m->g0
    158 	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
    159 	PUSHQ	AX
    160 	MOVL	DI, DX
    161 	MOVL	0(DI), DI
    162 	CALL	DI
    163 	POPQ	AX
    164 	MOVL	$runtimebadmcall2(SB), AX
    165 	JMP	AX
    166 	RET
    167 
    168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
    169 // of the G stack.  We need to distinguish the routine that
    170 // lives at the bottom of the G stack from the one that lives
    171 // at the top of the system stack because the one at the top of
    172 // the system stack terminates the stack walk (see topofstack()).
    173 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0
    174 	RET
    175 
    176 // func systemstack(fn func())
    177 TEXT runtimesystemstack(SB), NOSPLIT, $0-4
    178 	MOVL	fn+0(FP), DI	// DI = fn
    179 	get_tls(CX)
    180 	MOVL	g(CX), AX	// AX = g
    181 	MOVL	g_m(AX), BX	// BX = m
    182 
    183 	MOVL	m_gsignal(BX), DX	// DX = gsignal
    184 	CMPL	AX, DX
    185 	JEQ	noswitch
    186 
    187 	MOVL	m_g0(BX), DX	// DX = g0
    188 	CMPL	AX, DX
    189 	JEQ	noswitch
    190 
    191 	MOVL	m_curg(BX), R8
    192 	CMPL	AX, R8
    193 	JEQ	switch
    194 
    195 	// Not g0, not curg. Must be gsignal, but that's not allowed.
    196 	// Hide call from linker nosplit analysis.
    197 	MOVL	$runtimebadsystemstack(SB), AX
    198 	CALL	AX
    199 
    200 switch:
    201 	// save our state in g->sched.  Pretend to
    202 	// be systemstack_switch if the G stack is scanned.
    203 	MOVL	$runtimesystemstack_switch(SB), SI
    204 	MOVL	SI, (g_sched+gobuf_pc)(AX)
    205 	MOVL	SP, (g_sched+gobuf_sp)(AX)
    206 	MOVL	AX, (g_sched+gobuf_g)(AX)
    207 
    208 	// switch to g0
    209 	MOVL	DX, g(CX)
    210 	MOVL	(g_sched+gobuf_sp)(DX), SP
    211 
    212 	// call target function
    213 	MOVL	DI, DX
    214 	MOVL	0(DI), DI
    215 	CALL	DI
    216 
    217 	// switch back to g
    218 	get_tls(CX)
    219 	MOVL	g(CX), AX
    220 	MOVL	g_m(AX), BX
    221 	MOVL	m_curg(BX), AX
    222 	MOVL	AX, g(CX)
    223 	MOVL	(g_sched+gobuf_sp)(AX), SP
    224 	MOVL	$0, (g_sched+gobuf_sp)(AX)
    225 	RET
    226 
    227 noswitch:
    228 	// already on m stack, just call directly
    229 	MOVL	DI, DX
    230 	MOVL	0(DI), DI
    231 	CALL	DI
    232 	RET
    233 
    234 /*
    235  * support for morestack
    236  */
    237 
    238 // Called during function prolog when more stack is needed.
    239 //
    240 // The traceback routines see morestack on a g0 as being
    241 // the top of a stack (for example, morestack calling newstack
    242 // calling the scheduler calling newm calling gc), so we must
    243 // record an argument size. For that purpose, it has no arguments.
    244 TEXT runtimemorestack(SB),NOSPLIT,$0-0
    245 	get_tls(CX)
    246 	MOVL	g(CX), BX
    247 	MOVL	g_m(BX), BX
    248 
    249 	// Cannot grow scheduler stack (m->g0).
    250 	MOVL	m_g0(BX), SI
    251 	CMPL	g(CX), SI
    252 	JNE	2(PC)
    253 	MOVL	0, AX
    254 
    255 	// Cannot grow signal stack (m->gsignal).
    256 	MOVL	m_gsignal(BX), SI
    257 	CMPL	g(CX), SI
    258 	JNE	2(PC)
    259 	MOVL	0, AX
    260 
    261 	// Called from f.
    262 	// Set m->morebuf to f's caller.
    263 	MOVL	8(SP), AX	// f's caller's PC
    264 	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
    265 	LEAL	16(SP), AX	// f's caller's SP
    266 	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
    267 	get_tls(CX)
    268 	MOVL	g(CX), SI
    269 	MOVL	SI, (m_morebuf+gobuf_g)(BX)
    270 
    271 	// Set g->sched to context in f.
    272 	MOVL	0(SP), AX // f's PC
    273 	MOVL	AX, (g_sched+gobuf_pc)(SI)
    274 	MOVL	SI, (g_sched+gobuf_g)(SI)
    275 	LEAL	8(SP), AX // f's SP
    276 	MOVL	AX, (g_sched+gobuf_sp)(SI)
    277 	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
    278 
    279 	// Call newstack on m->g0's stack.
    280 	MOVL	m_g0(BX), BX
    281 	MOVL	BX, g(CX)
    282 	MOVL	(g_sched+gobuf_sp)(BX), SP
    283 	CALL	runtimenewstack(SB)
    284 	MOVL	$0, 0x1003	// crash if newstack returns
    285 	RET
    286 
    287 // morestack trampolines
    288 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$0
    289 	MOVL	$0, DX
    290 	JMP	runtimemorestack(SB)
    291 
    292 TEXT runtimestackBarrier(SB),NOSPLIT,$0
    293 	// We came here via a RET to an overwritten return PC.
    294 	// AX may be live. Other registers are available.
    295 
    296 	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
    297 	get_tls(CX)
    298 	MOVL	g(CX), CX
    299 	MOVL	(g_stkbar+slice_array)(CX), DX
    300 	MOVL	g_stkbarPos(CX), BX
    301 	IMULL	$stkbar__size, BX	// Too big for SIB.
    302 	ADDL	DX, BX
    303 	MOVL	stkbar_savedLRVal(BX), BX
    304 	// Record that this stack barrier was hit.
    305 	ADDL	$1, g_stkbarPos(CX)
    306 	// Jump to the original return PC.
    307 	JMP	BX
    308 
    309 // reflectcall: call a function with the given argument list
    310 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
    311 // we don't have variable-sized frames, so we use a small number
    312 // of constant-sized-frame functions to encode a few bits of size in the pc.
    313 // Caution: ugly multiline assembly macros in your future!
    314 
    315 #define DISPATCH(NAME,MAXSIZE)		\
    316 	CMPL	CX, $MAXSIZE;		\
    317 	JA	3(PC);			\
    318 	MOVL	$NAME(SB), AX;		\
    319 	JMP	AX
    320 // Note: can't just "JMP NAME(SB)" - bad inlining results.
    321 
    322 TEXT reflectcall(SB), NOSPLIT, $0-0
    323 	JMP	reflectcall(SB)
    324 
    325 TEXT reflectcall(SB), NOSPLIT, $0-20
    326 	MOVLQZX argsize+12(FP), CX
    327 	DISPATCH(runtimecall16, 16)
    328 	DISPATCH(runtimecall32, 32)
    329 	DISPATCH(runtimecall64, 64)
    330 	DISPATCH(runtimecall128, 128)
    331 	DISPATCH(runtimecall256, 256)
    332 	DISPATCH(runtimecall512, 512)
    333 	DISPATCH(runtimecall1024, 1024)
    334 	DISPATCH(runtimecall2048, 2048)
    335 	DISPATCH(runtimecall4096, 4096)
    336 	DISPATCH(runtimecall8192, 8192)
    337 	DISPATCH(runtimecall16384, 16384)
    338 	DISPATCH(runtimecall32768, 32768)
    339 	DISPATCH(runtimecall65536, 65536)
    340 	DISPATCH(runtimecall131072, 131072)
    341 	DISPATCH(runtimecall262144, 262144)
    342 	DISPATCH(runtimecall524288, 524288)
    343 	DISPATCH(runtimecall1048576, 1048576)
    344 	DISPATCH(runtimecall2097152, 2097152)
    345 	DISPATCH(runtimecall4194304, 4194304)
    346 	DISPATCH(runtimecall8388608, 8388608)
    347 	DISPATCH(runtimecall16777216, 16777216)
    348 	DISPATCH(runtimecall33554432, 33554432)
    349 	DISPATCH(runtimecall67108864, 67108864)
    350 	DISPATCH(runtimecall134217728, 134217728)
    351 	DISPATCH(runtimecall268435456, 268435456)
    352 	DISPATCH(runtimecall536870912, 536870912)
    353 	DISPATCH(runtimecall1073741824, 1073741824)
    354 	MOVL	$runtimebadreflectcall(SB), AX
    355 	JMP	AX
    356 
    357 #define CALLFN(NAME,MAXSIZE)			\
    358 TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
    359 	NO_LOCAL_POINTERS;			\
    360 	/* copy arguments to stack */		\
    361 	MOVL	argptr+8(FP), SI;		\
    362 	MOVL	argsize+12(FP), CX;		\
    363 	MOVL	SP, DI;				\
    364 	REP;MOVSB;				\
    365 	/* call function */			\
    366 	MOVL	f+4(FP), DX;			\
    367 	MOVL	(DX), AX;			\
    368 	CALL	AX;				\
    369 	/* copy return values back */		\
    370 	MOVL	argptr+8(FP), DI;		\
    371 	MOVL	argsize+12(FP), CX;		\
    372 	MOVL	retoffset+16(FP), BX;		\
    373 	MOVL	SP, SI;				\
    374 	ADDL	BX, DI;				\
    375 	ADDL	BX, SI;				\
    376 	SUBL	BX, CX;				\
    377 	REP;MOVSB;				\
    378 	/* execute write barrier updates */	\
    379 	MOVL	argtype+0(FP), DX;		\
    380 	MOVL	argptr+8(FP), DI;		\
    381 	MOVL	argsize+12(FP), CX;		\
    382 	MOVL	retoffset+16(FP), BX;		\
    383 	MOVL	DX, 0(SP);			\
    384 	MOVL	DI, 4(SP);			\
    385 	MOVL	CX, 8(SP);			\
    386 	MOVL	BX, 12(SP);			\
    387 	CALL	runtimecallwritebarrier(SB);	\
    388 	RET
    389 
    390 CALLFN(call16, 16)
    391 CALLFN(call32, 32)
    392 CALLFN(call64, 64)
    393 CALLFN(call128, 128)
    394 CALLFN(call256, 256)
    395 CALLFN(call512, 512)
    396 CALLFN(call1024, 1024)
    397 CALLFN(call2048, 2048)
    398 CALLFN(call4096, 4096)
    399 CALLFN(call8192, 8192)
    400 CALLFN(call16384, 16384)
    401 CALLFN(call32768, 32768)
    402 CALLFN(call65536, 65536)
    403 CALLFN(call131072, 131072)
    404 CALLFN(call262144, 262144)
    405 CALLFN(call524288, 524288)
    406 CALLFN(call1048576, 1048576)
    407 CALLFN(call2097152, 2097152)
    408 CALLFN(call4194304, 4194304)
    409 CALLFN(call8388608, 8388608)
    410 CALLFN(call16777216, 16777216)
    411 CALLFN(call33554432, 33554432)
    412 CALLFN(call67108864, 67108864)
    413 CALLFN(call134217728, 134217728)
    414 CALLFN(call268435456, 268435456)
    415 CALLFN(call536870912, 536870912)
    416 CALLFN(call1073741824, 1073741824)
    417 
    418 // bool cas(int32 *val, int32 old, int32 new)
    419 // Atomically:
    420 //	if(*val == old){
    421 //		*val = new;
    422 //		return 1;
    423 //	} else
    424 //		return 0;
    425 TEXT runtimecas(SB), NOSPLIT, $0-17
    426 	MOVL	ptr+0(FP), BX
    427 	MOVL	old+4(FP), AX
    428 	MOVL	new+8(FP), CX
    429 	LOCK
    430 	CMPXCHGL	CX, 0(BX)
    431 	SETEQ	ret+16(FP)
    432 	RET
    433 
    434 TEXT runtimecasuintptr(SB), NOSPLIT, $0-17
    435 	JMP	runtimecas(SB)
    436 
    437 TEXT runtimeatomicloaduintptr(SB), NOSPLIT, $0-12
    438 	JMP	runtimeatomicload(SB)
    439 
    440 TEXT runtimeatomicloaduint(SB), NOSPLIT, $0-12
    441 	JMP	runtimeatomicload(SB)
    442 
    443 TEXT runtimeatomicstoreuintptr(SB), NOSPLIT, $0-12
    444 	JMP	runtimeatomicstore(SB)
    445 
    446 // bool	runtimecas64(uint64 *val, uint64 old, uint64 new)
    447 // Atomically:
    448 //	if(*val == *old){
    449 //		*val = new;
    450 //		return 1;
    451 //	} else {
    452 //		return 0;
    453 //	}
    454 TEXT runtimecas64(SB), NOSPLIT, $0-25
    455 	MOVL	ptr+0(FP), BX
    456 	MOVQ	old+8(FP), AX
    457 	MOVQ	new+16(FP), CX
    458 	LOCK
    459 	CMPXCHGQ	CX, 0(BX)
    460 	SETEQ	ret+24(FP)
    461 	RET
    462 
    463 // bool casp(void **val, void *old, void *new)
    464 // Atomically:
    465 //	if(*val == old){
    466 //		*val = new;
    467 //		return 1;
    468 //	} else
    469 //		return 0;
    470 TEXT runtimecasp1(SB), NOSPLIT, $0-17
    471 	MOVL	ptr+0(FP), BX
    472 	MOVL	old+4(FP), AX
    473 	MOVL	new+8(FP), CX
    474 	LOCK
    475 	CMPXCHGL	CX, 0(BX)
    476 	SETEQ	ret+16(FP)
    477 	RET
    478 
    479 // uint32 xadd(uint32 volatile *val, int32 delta)
    480 // Atomically:
    481 //	*val += delta;
    482 //	return *val;
    483 TEXT runtimexadd(SB), NOSPLIT, $0-12
    484 	MOVL	ptr+0(FP), BX
    485 	MOVL	delta+4(FP), AX
    486 	MOVL	AX, CX
    487 	LOCK
    488 	XADDL	AX, 0(BX)
    489 	ADDL	CX, AX
    490 	MOVL	AX, ret+8(FP)
    491 	RET
    492 
    493 TEXT runtimexadd64(SB), NOSPLIT, $0-24
    494 	MOVL	ptr+0(FP), BX
    495 	MOVQ	delta+8(FP), AX
    496 	MOVQ	AX, CX
    497 	LOCK
    498 	XADDQ	AX, 0(BX)
    499 	ADDQ	CX, AX
    500 	MOVQ	AX, ret+16(FP)
    501 	RET
    502 
    503 TEXT runtimexadduintptr(SB), NOSPLIT, $0-12
    504 	JMP	runtimexadd(SB)
    505 
    506 TEXT runtimexchg(SB), NOSPLIT, $0-12
    507 	MOVL	ptr+0(FP), BX
    508 	MOVL	new+4(FP), AX
    509 	XCHGL	AX, 0(BX)
    510 	MOVL	AX, ret+8(FP)
    511 	RET
    512 
    513 TEXT runtimexchg64(SB), NOSPLIT, $0-24
    514 	MOVL	ptr+0(FP), BX
    515 	MOVQ	new+8(FP), AX
    516 	XCHGQ	AX, 0(BX)
    517 	MOVQ	AX, ret+16(FP)
    518 	RET
    519 
    520 TEXT runtimexchgp1(SB), NOSPLIT, $0-12
    521 	MOVL	ptr+0(FP), BX
    522 	MOVL	new+4(FP), AX
    523 	XCHGL	AX, 0(BX)
    524 	MOVL	AX, ret+8(FP)
    525 	RET
    526 
    527 TEXT runtimexchguintptr(SB), NOSPLIT, $0-12
    528 	JMP	runtimexchg(SB)
    529 
    530 TEXT runtimeprocyield(SB),NOSPLIT,$0-0
    531 	MOVL	cycles+0(FP), AX
    532 again:
    533 	PAUSE
    534 	SUBL	$1, AX
    535 	JNZ	again
    536 	RET
    537 
    538 TEXT runtimeatomicstorep1(SB), NOSPLIT, $0-8
    539 	MOVL	ptr+0(FP), BX
    540 	MOVL	val+4(FP), AX
    541 	XCHGL	AX, 0(BX)
    542 	RET
    543 
    544 TEXT runtimeatomicstore(SB), NOSPLIT, $0-8
    545 	MOVL	ptr+0(FP), BX
    546 	MOVL	val+4(FP), AX
    547 	XCHGL	AX, 0(BX)
    548 	RET
    549 
    550 TEXT runtimeatomicstore64(SB), NOSPLIT, $0-16
    551 	MOVL	ptr+0(FP), BX
    552 	MOVQ	val+8(FP), AX
    553 	XCHGQ	AX, 0(BX)
    554 	RET
    555 
    556 // void	runtimeatomicor8(byte volatile*, byte);
    557 TEXT runtimeatomicor8(SB), NOSPLIT, $0-5
    558 	MOVL	ptr+0(FP), BX
    559 	MOVB	val+4(FP), AX
    560 	LOCK
    561 	ORB	AX, 0(BX)
    562 	RET
    563 
    564 // void	runtimeatomicand8(byte volatile*, byte);
    565 TEXT runtimeatomicand8(SB), NOSPLIT, $0-5
    566 	MOVL	ptr+0(FP), BX
    567 	MOVB	val+4(FP), AX
    568 	LOCK
    569 	ANDB	AX, 0(BX)
    570 	RET
    571 
    572 TEXT publicationBarrier(SB),NOSPLIT,$0-0
    573 	// Stores are already ordered on x86, so this is just a
    574 	// compile barrier.
    575 	RET
    576 
    577 // void jmpdefer(fn, sp);
    578 // called from deferreturn.
    579 // 1. pop the caller
    580 // 2. sub 5 bytes from the callers return
    581 // 3. jmp to the argument
    582 TEXT runtimejmpdefer(SB), NOSPLIT, $0-8
    583 	MOVL	fv+0(FP), DX
    584 	MOVL	argp+4(FP), BX
    585 	LEAL	-8(BX), SP	// caller sp after CALL
    586 	SUBL	$5, (SP)	// return to CALL again
    587 	MOVL	0(DX), BX
    588 	JMP	BX	// but first run the deferred function
    589 
    590 // func asmcgocall(fn, arg unsafe.Pointer) int32
    591 // Not implemented.
    592 TEXT runtimeasmcgocall(SB),NOSPLIT,$0-12
    593 	MOVL	0, AX
    594 	RET
    595 
    596 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
    597 // Not implemented.
    598 TEXT runtimecgocallback(SB),NOSPLIT,$0-12
    599 	MOVL	0, AX
    600 	RET
    601 
    602 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
    603 // Not implemented.
    604 TEXT cgocallback_gofunc(SB),NOSPLIT,$0-12
    605 	MOVL	0, AX
    606 	RET
    607 
    608 // void setg(G*); set g. for use by needm.
    609 // Not implemented.
    610 TEXT runtimesetg(SB), NOSPLIT, $0-4
    611 	MOVL	0, AX
    612 	RET
    613 
    614 // check that SP is in range [g->stack.lo, g->stack.hi)
    615 TEXT runtimestackcheck(SB), NOSPLIT, $0-0
    616 	get_tls(CX)
    617 	MOVL	g(CX), AX
    618 	CMPL	(g_stack+stack_hi)(AX), SP
    619 	JHI	2(PC)
    620 	MOVL	0, AX
    621 	CMPL	SP, (g_stack+stack_lo)(AX)
    622 	JHI	2(PC)
    623 	MOVL	0, AX
    624 	RET
    625 
    626 TEXT runtimememclr(SB),NOSPLIT,$0-8
    627 	MOVL	ptr+0(FP), DI
    628 	MOVL	n+4(FP), CX
    629 	MOVQ	CX, BX
    630 	ANDQ	$7, BX
    631 	SHRQ	$3, CX
    632 	MOVQ	$0, AX
    633 	CLD
    634 	REP
    635 	STOSQ
    636 	MOVQ	BX, CX
    637 	REP
    638 	STOSB
    639 	RET
    640 
    641 TEXT runtimegetcallerpc(SB),NOSPLIT,$8-12
    642 	MOVL	argp+0(FP),AX		// addr of first arg
    643 	MOVL	-8(AX),AX		// get calling pc
    644 	CMPL	AX, runtimestackBarrierPC(SB)
    645 	JNE	nobar
    646 	// Get original return PC.
    647 	CALL	runtimenextBarrierPC(SB)
    648 	MOVL	0(SP), AX
    649 nobar:
    650 	MOVL	AX, ret+8(FP)
    651 	RET
    652 
    653 TEXT runtimesetcallerpc(SB),NOSPLIT,$8-8
    654 	MOVL	argp+0(FP),AX		// addr of first arg
    655 	MOVL	pc+4(FP), BX		// pc to set
    656 	MOVL	-8(AX), CX
    657 	CMPL	CX, runtimestackBarrierPC(SB)
    658 	JEQ	setbar
    659 	MOVQ	BX, -8(AX)		// set calling pc
    660 	RET
    661 setbar:
    662 	// Set the stack barrier return PC.
    663 	MOVL	BX, 0(SP)
    664 	CALL	runtimesetNextBarrierPC(SB)
    665 	RET
    666 
    667 TEXT runtimegetcallersp(SB),NOSPLIT,$0-12
    668 	MOVL	argp+0(FP), AX
    669 	MOVL	AX, ret+8(FP)
    670 	RET
    671 
    672 // int64 runtimecputicks(void)
    673 TEXT runtimecputicks(SB),NOSPLIT,$0-0
    674 	RDTSC
    675 	SHLQ	$32, DX
    676 	ADDQ	DX, AX
    677 	MOVQ	AX, ret+0(FP)
    678 	RET
    679 
    680 // memhash_varlen(p unsafe.Pointer, h seed) uintptr
    681 // redirects to memhash(p, h, size) using the size
    682 // stored in the closure.
    683 TEXT runtimememhash_varlen(SB),NOSPLIT,$24-12
    684 	GO_ARGS
    685 	NO_LOCAL_POINTERS
    686 	MOVL	p+0(FP), AX
    687 	MOVL	h+4(FP), BX
    688 	MOVL	4(DX), CX
    689 	MOVL	AX, 0(SP)
    690 	MOVL	BX, 4(SP)
    691 	MOVL	CX, 8(SP)
    692 	CALL	runtimememhash(SB)
    693 	MOVL	16(SP), AX
    694 	MOVL	AX, ret+8(FP)
    695 	RET
    696 
    697 // hash function using AES hardware instructions
    698 // For now, our one amd64p32 system (NaCl) does not
    699 // support using AES instructions, so have not bothered to
    700 // write the implementations. Can copy and adjust the ones
    701 // in asm_amd64.s when the time comes.
    702 
    703 TEXT runtimeaeshash(SB),NOSPLIT,$0-20
    704 	MOVL	AX, ret+16(FP)
    705 	RET
    706 
    707 TEXT runtimeaeshashstr(SB),NOSPLIT,$0-20
    708 	MOVL	AX, ret+16(FP)
    709 	RET
    710 
    711 TEXT runtimeaeshash32(SB),NOSPLIT,$0-20
    712 	MOVL	AX, ret+16(FP)
    713 	RET
    714 
    715 TEXT runtimeaeshash64(SB),NOSPLIT,$0-20
    716 	MOVL	AX, ret+16(FP)
    717 	RET
    718 
    719 TEXT runtimememeq(SB),NOSPLIT,$0-17
    720 	MOVL	a+0(FP), SI
    721 	MOVL	b+4(FP), DI
    722 	MOVL	size+8(FP), BX
    723 	CALL	runtimememeqbody(SB)
    724 	MOVB	AX, ret+16(FP)
    725 	RET
    726 
    727 // memequal_varlen(a, b unsafe.Pointer) bool
    728 TEXT runtimememequal_varlen(SB),NOSPLIT,$0-9
    729 	MOVL    a+0(FP), SI
    730 	MOVL    b+4(FP), DI
    731 	CMPL    SI, DI
    732 	JEQ     eq
    733 	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    734 	CALL    runtimememeqbody(SB)
    735 	MOVB    AX, ret+8(FP)
    736 	RET
    737 eq:
    738 	MOVB    $1, ret+8(FP)
    739 	RET
    740 
    741 // eqstring tests whether two strings are equal.
    742 // The compiler guarantees that strings passed
    743 // to eqstring have equal length.
    744 // See runtime_test.go:eqstring_generic for
    745 // equivalent Go code.
    746 TEXT runtimeeqstring(SB),NOSPLIT,$0-17
    747 	MOVL	s1str+0(FP), SI
    748 	MOVL	s2str+8(FP), DI
    749 	CMPL	SI, DI
    750 	JEQ	same
    751 	MOVL	s1len+4(FP), BX
    752 	CALL	runtimememeqbody(SB)
    753 	MOVB	AX, v+16(FP)
    754 	RET
    755 same:
    756 	MOVB	$1, v+16(FP)
    757 	RET
    758 
    759 // a in SI
    760 // b in DI
    761 // count in BX
    762 TEXT runtimememeqbody(SB),NOSPLIT,$0-0
    763 	XORQ	AX, AX
    764 
    765 	CMPQ	BX, $8
    766 	JB	small
    767 
    768 	// 64 bytes at a time using xmm registers
    769 hugeloop:
    770 	CMPQ	BX, $64
    771 	JB	bigloop
    772 	MOVOU	(SI), X0
    773 	MOVOU	(DI), X1
    774 	MOVOU	16(SI), X2
    775 	MOVOU	16(DI), X3
    776 	MOVOU	32(SI), X4
    777 	MOVOU	32(DI), X5
    778 	MOVOU	48(SI), X6
    779 	MOVOU	48(DI), X7
    780 	PCMPEQB	X1, X0
    781 	PCMPEQB	X3, X2
    782 	PCMPEQB	X5, X4
    783 	PCMPEQB	X7, X6
    784 	PAND	X2, X0
    785 	PAND	X6, X4
    786 	PAND	X4, X0
    787 	PMOVMSKB X0, DX
    788 	ADDQ	$64, SI
    789 	ADDQ	$64, DI
    790 	SUBQ	$64, BX
    791 	CMPL	DX, $0xffff
    792 	JEQ	hugeloop
    793 	RET
    794 
    795 	// 8 bytes at a time using 64-bit register
    796 bigloop:
    797 	CMPQ	BX, $8
    798 	JBE	leftover
    799 	MOVQ	(SI), CX
    800 	MOVQ	(DI), DX
    801 	ADDQ	$8, SI
    802 	ADDQ	$8, DI
    803 	SUBQ	$8, BX
    804 	CMPQ	CX, DX
    805 	JEQ	bigloop
    806 	RET
    807 
    808 	// remaining 0-8 bytes
    809 leftover:
    810 	ADDQ	BX, SI
    811 	ADDQ	BX, DI
    812 	MOVQ	-8(SI), CX
    813 	MOVQ	-8(DI), DX
    814 	CMPQ	CX, DX
    815 	SETEQ	AX
    816 	RET
    817 
    818 small:
    819 	CMPQ	BX, $0
    820 	JEQ	equal
    821 
    822 	LEAQ	0(BX*8), CX
    823 	NEGQ	CX
    824 
    825 	CMPB	SI, $0xf8
    826 	JA	si_high
    827 
    828 	// load at SI won't cross a page boundary.
    829 	MOVQ	(SI), SI
    830 	JMP	si_finish
    831 si_high:
    832 	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
    833 	MOVQ	BX, DX
    834 	ADDQ	SI, DX
    835 	MOVQ	-8(DX), SI
    836 	SHRQ	CX, SI
    837 si_finish:
    838 
    839 	// same for DI.
    840 	CMPB	DI, $0xf8
    841 	JA	di_high
    842 	MOVQ	(DI), DI
    843 	JMP	di_finish
    844 di_high:
    845 	MOVQ	BX, DX
    846 	ADDQ	DI, DX
    847 	MOVQ	-8(DX), DI
    848 	SHRQ	CX, DI
    849 di_finish:
    850 
    851 	SUBQ	SI, DI
    852 	SHLQ	CX, DI
    853 equal:
    854 	SETEQ	AX
    855 	RET
    856 
    857 TEXT runtimecmpstring(SB),NOSPLIT,$0-20
    858 	MOVL	s1_base+0(FP), SI
    859 	MOVL	s1_len+4(FP), BX
    860 	MOVL	s2_base+8(FP), DI
    861 	MOVL	s2_len+12(FP), DX
    862 	CALL	runtimecmpbody(SB)
    863 	MOVL	AX, ret+16(FP)
    864 	RET
    865 
    866 TEXT bytesCompare(SB),NOSPLIT,$0-28
    867 	MOVL	s1+0(FP), SI
    868 	MOVL	s1+4(FP), BX
    869 	MOVL	s2+12(FP), DI
    870 	MOVL	s2+16(FP), DX
    871 	CALL	runtimecmpbody(SB)
    872 	MOVL	AX, res+24(FP)
    873 	RET
    874 
    875 // input:
    876 //   SI = a
    877 //   DI = b
    878 //   BX = alen
    879 //   DX = blen
    880 // output:
    881 //   AX = 1/0/-1
    882 TEXT runtimecmpbody(SB),NOSPLIT,$0-0
    883 	CMPQ	SI, DI
    884 	JEQ	allsame
    885 	CMPQ	BX, DX
    886 	MOVQ	DX, R8
    887 	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
    888 	CMPQ	R8, $8
    889 	JB	small
    890 
    891 loop:
    892 	CMPQ	R8, $16
    893 	JBE	_0through16
    894 	MOVOU	(SI), X0
    895 	MOVOU	(DI), X1
    896 	PCMPEQB X0, X1
    897 	PMOVMSKB X1, AX
    898 	XORQ	$0xffff, AX	// convert EQ to NE
    899 	JNE	diff16	// branch if at least one byte is not equal
    900 	ADDQ	$16, SI
    901 	ADDQ	$16, DI
    902 	SUBQ	$16, R8
    903 	JMP	loop
    904 
    905 	// AX = bit mask of differences
    906 diff16:
    907 	BSFQ	AX, BX	// index of first byte that differs
    908 	XORQ	AX, AX
    909 	ADDQ	BX, SI
    910 	MOVB	(SI), CX
    911 	ADDQ	BX, DI
    912 	CMPB	CX, (DI)
    913 	SETHI	AX
    914 	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
    915 	RET
    916 
    917 	// 0 through 16 bytes left, alen>=8, blen>=8
    918 _0through16:
    919 	CMPQ	R8, $8
    920 	JBE	_0through8
    921 	MOVQ	(SI), AX
    922 	MOVQ	(DI), CX
    923 	CMPQ	AX, CX
    924 	JNE	diff8
    925 _0through8:
    926 	ADDQ	R8, SI
    927 	ADDQ	R8, DI
    928 	MOVQ	-8(SI), AX
    929 	MOVQ	-8(DI), CX
    930 	CMPQ	AX, CX
    931 	JEQ	allsame
    932 
    933 	// AX and CX contain parts of a and b that differ.
    934 diff8:
    935 	BSWAPQ	AX	// reverse order of bytes
    936 	BSWAPQ	CX
    937 	XORQ	AX, CX
    938 	BSRQ	CX, CX	// index of highest bit difference
    939 	SHRQ	CX, AX	// move a's bit to bottom
    940 	ANDQ	$1, AX	// mask bit
    941 	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
    942 	RET
    943 
    944 	// 0-7 bytes in common
    945 small:
    946 	LEAQ	(R8*8), CX	// bytes left -> bits left
    947 	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
    948 	JEQ	allsame
    949 
    950 	// load bytes of a into high bytes of AX
    951 	CMPB	SI, $0xf8
    952 	JA	si_high
    953 	MOVQ	(SI), SI
    954 	JMP	si_finish
    955 si_high:
    956 	ADDQ	R8, SI
    957 	MOVQ	-8(SI), SI
    958 	SHRQ	CX, SI
    959 si_finish:
    960 	SHLQ	CX, SI
    961 
    962 	// load bytes of b in to high bytes of BX
    963 	CMPB	DI, $0xf8
    964 	JA	di_high
    965 	MOVQ	(DI), DI
    966 	JMP	di_finish
    967 di_high:
    968 	ADDQ	R8, DI
    969 	MOVQ	-8(DI), DI
    970 	SHRQ	CX, DI
    971 di_finish:
    972 	SHLQ	CX, DI
    973 
    974 	BSWAPQ	SI	// reverse order of bytes
    975 	BSWAPQ	DI
    976 	XORQ	SI, DI	// find bit differences
    977 	JEQ	allsame
    978 	BSRQ	DI, CX	// index of highest bit difference
    979 	SHRQ	CX, SI	// move a's bit to bottom
    980 	ANDQ	$1, SI	// mask bit
    981 	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
    982 	RET
    983 
    984 allsame:
    985 	XORQ	AX, AX
    986 	XORQ	CX, CX
    987 	CMPQ	BX, DX
    988 	SETGT	AX	// 1 if alen > blen
    989 	SETEQ	CX	// 1 if alen == blen
    990 	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
    991 	RET
    992 
    993 TEXT bytesIndexByte(SB),NOSPLIT,$0-20
    994 	MOVL s+0(FP), SI
    995 	MOVL s_len+4(FP), BX
    996 	MOVB c+12(FP), AL
    997 	CALL runtimeindexbytebody(SB)
    998 	MOVL AX, ret+16(FP)
    999 	RET
   1000 
   1001 TEXT stringsIndexByte(SB),NOSPLIT,$0-20
   1002 	MOVL s+0(FP), SI
   1003 	MOVL s_len+4(FP), BX
   1004 	MOVB c+8(FP), AL
   1005 	CALL runtimeindexbytebody(SB)
   1006 	MOVL AX, ret+16(FP)
   1007 	RET
   1008 
   1009 // input:
   1010 //   SI: data
   1011 //   BX: data len
   1012 //   AL: byte sought
   1013 // output:
   1014 //   AX
   1015 TEXT runtimeindexbytebody(SB),NOSPLIT,$0
   1016 	MOVL SI, DI
   1017 
   1018 	CMPL BX, $16
   1019 	JLT small
   1020 
   1021 	// round up to first 16-byte boundary
   1022 	TESTL $15, SI
   1023 	JZ aligned
   1024 	MOVL SI, CX
   1025 	ANDL $~15, CX
   1026 	ADDL $16, CX
   1027 
   1028 	// search the beginning
   1029 	SUBL SI, CX
   1030 	REPN; SCASB
   1031 	JZ success
   1032 
   1033 // DI is 16-byte aligned; get ready to search using SSE instructions
   1034 aligned:
   1035 	// round down to last 16-byte boundary
   1036 	MOVL BX, R11
   1037 	ADDL SI, R11
   1038 	ANDL $~15, R11
   1039 
   1040 	// shuffle X0 around so that each byte contains c
   1041 	MOVD AX, X0
   1042 	PUNPCKLBW X0, X0
   1043 	PUNPCKLBW X0, X0
   1044 	PSHUFL $0, X0, X0
   1045 	JMP condition
   1046 
   1047 sse:
   1048 	// move the next 16-byte chunk of the buffer into X1
   1049 	MOVO (DI), X1
   1050 	// compare bytes in X0 to X1
   1051 	PCMPEQB X0, X1
   1052 	// take the top bit of each byte in X1 and put the result in DX
   1053 	PMOVMSKB X1, DX
   1054 	TESTL DX, DX
   1055 	JNZ ssesuccess
   1056 	ADDL $16, DI
   1057 
   1058 condition:
   1059 	CMPL DI, R11
   1060 	JLT sse
   1061 
   1062 	// search the end
   1063 	MOVL SI, CX
   1064 	ADDL BX, CX
   1065 	SUBL R11, CX
   1066 	// if CX == 0, the zero flag will be set and we'll end up
   1067 	// returning a false success
   1068 	JZ failure
   1069 	REPN; SCASB
   1070 	JZ success
   1071 
   1072 failure:
   1073 	MOVL $-1, AX
   1074 	RET
   1075 
   1076 // handle for lengths < 16
   1077 small:
   1078 	MOVL BX, CX
   1079 	REPN; SCASB
   1080 	JZ success
   1081 	MOVL $-1, AX
   1082 	RET
   1083 
   1084 // we've found the chunk containing the byte
   1085 // now just figure out which specific byte it is
   1086 ssesuccess:
   1087 	// get the index of the least significant set bit
   1088 	BSFW DX, DX
   1089 	SUBL SI, DI
   1090 	ADDL DI, DX
   1091 	MOVL DX, AX
   1092 	RET
   1093 
   1094 success:
   1095 	SUBL SI, DI
   1096 	SUBL $1, DI
   1097 	MOVL DI, AX
   1098 	RET
   1099 
   1100 TEXT bytesEqual(SB),NOSPLIT,$0-25
   1101 	MOVL	a_len+4(FP), BX
   1102 	MOVL	b_len+16(FP), CX
   1103 	XORL	AX, AX
   1104 	CMPL	BX, CX
   1105 	JNE	eqret
   1106 	MOVL	a+0(FP), SI
   1107 	MOVL	b+12(FP), DI
   1108 	CALL	runtimememeqbody(SB)
   1109 eqret:
   1110 	MOVB	AX, ret+24(FP)
   1111 	RET
   1112 
   1113 TEXT runtimefastrand1(SB), NOSPLIT, $0-4
   1114 	get_tls(CX)
   1115 	MOVL	g(CX), AX
   1116 	MOVL	g_m(AX), AX
   1117 	MOVL	m_fastrand(AX), DX
   1118 	ADDL	DX, DX
   1119 	MOVL	DX, BX
   1120 	XORL	$0x88888eef, DX
   1121 	CMOVLMI	BX, DX
   1122 	MOVL	DX, m_fastrand(AX)
   1123 	MOVL	DX, ret+0(FP)
   1124 	RET
   1125 
   1126 TEXT runtimereturn0(SB), NOSPLIT, $0
   1127 	MOVL	$0, AX
   1128 	RET
   1129 
   1130 // The top-most function running on a goroutine
   1131 // returns to goexit+PCQuantum.
   1132 TEXT runtimegoexit(SB),NOSPLIT,$0-0
   1133 	BYTE	$0x90	// NOP
   1134 	CALL	runtimegoexit1(SB)	// does not return
   1135 	// traceback from goexit1 must hit code range of goexit
   1136 	BYTE	$0x90	// NOP
   1137 
   1138 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-4
   1139 	MOVL	addr+0(FP), AX
   1140 	PREFETCHT0	(AX)
   1141 	RET
   1142 
   1143 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-4
   1144 	MOVL	addr+0(FP), AX
   1145 	PREFETCHT1	(AX)
   1146 	RET
   1147 
   1148 
   1149 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-4
   1150 	MOVL	addr+0(FP), AX
   1151 	PREFETCHT2	(AX)
   1152 	RET
   1153 
   1154 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-4
   1155 	MOVL	addr+0(FP), AX
   1156 	PREFETCHNTA	(AX)
   1157 	RET
   1158