Home | History | Annotate | Download | only in runtime
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "go_asm.h"
      6 #include "go_tls.h"
      7 #include "tls_arm64.h"
      8 #include "funcdata.h"
      9 #include "textflag.h"
     10 
     11 TEXT runtimert0_go(SB),NOSPLIT,$0
     12 	// SP = stack; R0 = argc; R1 = argv
     13 
     14 	// initialize essential registers
     15 	BL	runtimereginit(SB)
     16 
     17 	SUB	$32, RSP
     18 	MOVW	R0, 8(RSP) // argc
     19 	MOVD	R1, 16(RSP) // argv
     20 
     21 	// create istack out of the given (operating system) stack.
     22 	// _cgo_init may update stackguard.
     23 	MOVD	$runtimeg0(SB), g
     24 	MOVD RSP, R7
     25 	MOVD	$(-64*1024)(R7), R0
     26 	MOVD	R0, g_stackguard0(g)
     27 	MOVD	R0, g_stackguard1(g)
     28 	MOVD	R0, (g_stack+stack_lo)(g)
     29 	MOVD	R7, (g_stack+stack_hi)(g)
     30 
     31 	// if there is a _cgo_init, call it using the gcc ABI.
     32 	MOVD	_cgo_init(SB), R12
     33 	CMP	$0, R12
     34 	BEQ	nocgo
     35 
     36 	MRS_TPIDR_R0			// load TLS base pointer
     37 	MOVD	R0, R3			// arg 3: TLS base pointer
     38 #ifdef TLSG_IS_VARIABLE
     39 	MOVD	$runtimetls_g(SB), R2 	// arg 2: tlsg
     40 #else
     41 	MOVD	$0x10, R2		// arg 2: tlsg TODO(minux): hardcoded for linux
     42 #endif
     43 	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
     44 	MOVD	g, R0			// arg 0: G
     45 	BL	(R12)
     46 	MOVD	_cgo_init(SB), R12
     47 	CMP	$0, R12
     48 	BEQ	nocgo
     49 
     50 nocgo:
     51 	// update stackguard after _cgo_init
     52 	MOVD	(g_stack+stack_lo)(g), R0
     53 	ADD	$const__StackGuard, R0
     54 	MOVD	R0, g_stackguard0(g)
     55 	MOVD	R0, g_stackguard1(g)
     56 
     57 	// set the per-goroutine and per-mach "registers"
     58 	MOVD	$runtimem0(SB), R0
     59 
     60 	// save m->g0 = g0
     61 	MOVD	g, m_g0(R0)
     62 	// save m0 to g0->m
     63 	MOVD	R0, g_m(g)
     64 
     65 	BL	runtimecheck(SB)
     66 
     67 	MOVW	8(RSP), R0	// copy argc
     68 	MOVW	R0, -8(RSP)
     69 	MOVD	16(RSP), R0		// copy argv
     70 	MOVD	R0, 0(RSP)
     71 	BL	runtimeargs(SB)
     72 	BL	runtimeosinit(SB)
     73 	BL	runtimeschedinit(SB)
     74 
     75 	// create a new goroutine to start program
     76 	MOVD	$runtimemainPC(SB), R0		// entry
     77 	MOVD	RSP, R7
     78 	MOVD.W	$0, -8(R7)
     79 	MOVD.W	R0, -8(R7)
     80 	MOVD.W	$0, -8(R7)
     81 	MOVD.W	$0, -8(R7)
     82 	MOVD	R7, RSP
     83 	BL	runtimenewproc(SB)
     84 	ADD	$32, RSP
     85 
     86 	// start this M
     87 	BL	runtimemstart(SB)
     88 
     89 	MOVD	$0, R0
     90 	MOVD	R0, (R0)	// boom
     91 	UNDEF
     92 
     93 DATA	runtimemainPC+0(SB)/8,$runtimemain(SB)
     94 GLOBL	runtimemainPC(SB),RODATA,$8
     95 
     96 TEXT runtimebreakpoint(SB),NOSPLIT,$-8-0
     97 	BRK
     98 	RET
     99 
    100 TEXT runtimeasminit(SB),NOSPLIT,$-8-0
    101 	RET
    102 
    103 TEXT runtimereginit(SB),NOSPLIT,$-8-0
    104 	// initialize essential FP registers
    105 	FMOVD	$4503601774854144.0, F27
    106 	FMOVD	$0.5, F29
    107 	FSUBD	F29, F29, F28
    108 	FADDD	F29, F29, F30
    109 	FADDD	F30, F30, F31
    110 	RET
    111 
    112 /*
    113  *  go-routine
    114  */
    115 
    116 // void gosave(Gobuf*)
    117 // save state in Gobuf; setjmp
    118 TEXT runtimegosave(SB), NOSPLIT, $-8-8
    119 	MOVD	buf+0(FP), R3
    120 	MOVD	RSP, R0
    121 	MOVD	R0, gobuf_sp(R3)
    122 	MOVD	LR, gobuf_pc(R3)
    123 	MOVD	g, gobuf_g(R3)
    124 	MOVD	ZR, gobuf_lr(R3)
    125 	MOVD	ZR, gobuf_ret(R3)
    126 	MOVD	ZR, gobuf_ctxt(R3)
    127 	RET
    128 
    129 // void gogo(Gobuf*)
    130 // restore state from Gobuf; longjmp
    131 TEXT runtimegogo(SB), NOSPLIT, $-8-8
    132 	MOVD	buf+0(FP), R5
    133 	MOVD	gobuf_g(R5), g
    134 	BL	runtimesave_g(SB)
    135 
    136 	MOVD	0(g), R4	// make sure g is not nil
    137 	MOVD	gobuf_sp(R5), R0
    138 	MOVD	R0, RSP
    139 	MOVD	gobuf_lr(R5), LR
    140 	MOVD	gobuf_ret(R5), R0
    141 	MOVD	gobuf_ctxt(R5), R26
    142 	MOVD	$0, gobuf_sp(R5)
    143 	MOVD	$0, gobuf_ret(R5)
    144 	MOVD	$0, gobuf_lr(R5)
    145 	MOVD	$0, gobuf_ctxt(R5)
    146 	CMP	ZR, ZR // set condition codes for == test, needed by stack split
    147 	MOVD	gobuf_pc(R5), R6
    148 	B	(R6)
    149 
    150 // void mcall(fn func(*g))
    151 // Switch to m->g0's stack, call fn(g).
    152 // Fn must never return.  It should gogo(&g->sched)
    153 // to keep running g.
    154 TEXT runtimemcall(SB), NOSPLIT, $-8-8
    155 	// Save caller state in g->sched
    156 	MOVD	RSP, R0
    157 	MOVD	R0, (g_sched+gobuf_sp)(g)
    158 	MOVD	LR, (g_sched+gobuf_pc)(g)
    159 	MOVD	$0, (g_sched+gobuf_lr)(g)
    160 	MOVD	g, (g_sched+gobuf_g)(g)
    161 
    162 	// Switch to m->g0 & its stack, call fn.
    163 	MOVD	g, R3
    164 	MOVD	g_m(g), R8
    165 	MOVD	m_g0(R8), g
    166 	BL	runtimesave_g(SB)
    167 	CMP	g, R3
    168 	BNE	2(PC)
    169 	B	runtimebadmcall(SB)
    170 	MOVD	fn+0(FP), R26			// context
    171 	MOVD	0(R26), R4			// code pointer
    172 	MOVD	(g_sched+gobuf_sp)(g), R0
    173 	MOVD	R0, RSP	// sp = m->g0->sched.sp
    174 	MOVD	R3, -8(RSP)
    175 	MOVD	$0, -16(RSP)
    176 	SUB	$16, RSP
    177 	BL	(R4)
    178 	B	runtimebadmcall2(SB)
    179 
    180 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
    181 // of the G stack.  We need to distinguish the routine that
    182 // lives at the bottom of the G stack from the one that lives
    183 // at the top of the system stack because the one at the top of
    184 // the system stack terminates the stack walk (see topofstack()).
    185 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0
    186 	UNDEF
    187 	BL	(LR)	// make sure this function is not leaf
    188 	RET
    189 
    190 // func systemstack(fn func())
    191 TEXT runtimesystemstack(SB), NOSPLIT, $0-8
    192 	MOVD	fn+0(FP), R3	// R3 = fn
    193 	MOVD	R3, R26		// context
    194 	MOVD	g_m(g), R4	// R4 = m
    195 
    196 	MOVD	m_gsignal(R4), R5	// R5 = gsignal
    197 	CMP	g, R5
    198 	BEQ	noswitch
    199 
    200 	MOVD	m_g0(R4), R5	// R5 = g0
    201 	CMP	g, R5
    202 	BEQ	noswitch
    203 
    204 	MOVD	m_curg(R4), R6
    205 	CMP	g, R6
    206 	BEQ	switch
    207 
    208 	// Bad: g is not gsignal, not g0, not curg. What is it?
    209 	// Hide call from linker nosplit analysis.
    210 	MOVD	$runtimebadsystemstack(SB), R3
    211 	BL	(R3)
    212 
    213 switch:
    214 	// save our state in g->sched.  Pretend to
    215 	// be systemstack_switch if the G stack is scanned.
    216 	MOVD	$runtimesystemstack_switch(SB), R6
    217 	ADD	$8, R6	// get past prologue
    218 	MOVD	R6, (g_sched+gobuf_pc)(g)
    219 	MOVD	RSP, R0
    220 	MOVD	R0, (g_sched+gobuf_sp)(g)
    221 	MOVD	$0, (g_sched+gobuf_lr)(g)
    222 	MOVD	g, (g_sched+gobuf_g)(g)
    223 
    224 	// switch to g0
    225 	MOVD	R5, g
    226 	BL	runtimesave_g(SB)
    227 	MOVD	(g_sched+gobuf_sp)(g), R3
    228 	// make it look like mstart called systemstack on g0, to stop traceback
    229 	SUB	$16, R3
    230 	AND	$~15, R3
    231 	MOVD	$runtimemstart(SB), R4
    232 	MOVD	R4, 0(R3)
    233 	MOVD	R3, RSP
    234 
    235 	// call target function
    236 	MOVD	0(R26), R3	// code pointer
    237 	BL	(R3)
    238 
    239 	// switch back to g
    240 	MOVD	g_m(g), R3
    241 	MOVD	m_curg(R3), g
    242 	BL	runtimesave_g(SB)
    243 	MOVD	(g_sched+gobuf_sp)(g), R0
    244 	MOVD	R0, RSP
    245 	MOVD	$0, (g_sched+gobuf_sp)(g)
    246 	RET
    247 
    248 noswitch:
    249 	// already on m stack, just call directly
    250 	MOVD	0(R26), R3	// code pointer
    251 	BL	(R3)
    252 	RET
    253 
    254 /*
    255  * support for morestack
    256  */
    257 
    258 // Called during function prolog when more stack is needed.
    259 // Caller has already loaded:
    260 // R3 prolog's LR (R30)
    261 //
    262 // The traceback routines see morestack on a g0 as being
    263 // the top of a stack (for example, morestack calling newstack
    264 // calling the scheduler calling newm calling gc), so we must
    265 // record an argument size. For that purpose, it has no arguments.
    266 TEXT runtimemorestack(SB),NOSPLIT,$-8-0
    267 	// Cannot grow scheduler stack (m->g0).
    268 	MOVD	g_m(g), R8
    269 	MOVD	m_g0(R8), R4
    270 	CMP	g, R4
    271 	BNE	2(PC)
    272 	B	runtimeabort(SB)
    273 
    274 	// Cannot grow signal stack (m->gsignal).
    275 	MOVD	m_gsignal(R8), R4
    276 	CMP	g, R4
    277 	BNE	2(PC)
    278 	B	runtimeabort(SB)
    279 
    280 	// Called from f.
    281 	// Set g->sched to context in f
    282 	MOVD	R26, (g_sched+gobuf_ctxt)(g)
    283 	MOVD	RSP, R0
    284 	MOVD	R0, (g_sched+gobuf_sp)(g)
    285 	MOVD	LR, (g_sched+gobuf_pc)(g)
    286 	MOVD	R3, (g_sched+gobuf_lr)(g)
    287 
    288 	// Called from f.
    289 	// Set m->morebuf to f's callers.
    290 	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
    291 	MOVD	RSP, R0
    292 	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
    293 	MOVD	g, (m_morebuf+gobuf_g)(R8)
    294 
    295 	// Call newstack on m->g0's stack.
    296 	MOVD	m_g0(R8), g
    297 	BL	runtimesave_g(SB)
    298 	MOVD	(g_sched+gobuf_sp)(g), R0
    299 	MOVD	R0, RSP
    300 	BL	runtimenewstack(SB)
    301 
    302 	// Not reached, but make sure the return PC from the call to newstack
    303 	// is still in this function, and not the beginning of the next.
    304 	UNDEF
    305 
    306 TEXT runtimemorestack_noctxt(SB),NOSPLIT,$-4-0
    307 	MOVW	$0, R26
    308 	B runtimemorestack(SB)
    309 
    310 TEXT runtimestackBarrier(SB),NOSPLIT,$0
    311 	// We came here via a RET to an overwritten LR.
    312 	// R0 may be live (see return0). Other registers are available.
    313 
    314 	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
    315 	MOVD	(g_stkbar+slice_array)(g), R4
    316 	MOVD	g_stkbarPos(g), R5
    317 	MOVD	$stkbar__size, R6
    318 	MUL	R5, R6
    319 	ADD	R4, R6
    320 	MOVD	stkbar_savedLRVal(R6), R6
    321 	// Record that this stack barrier was hit.
    322 	ADD	$1, R5
    323 	MOVD	R5, g_stkbarPos(g)
    324 	// Jump to the original return PC.
    325 	B	(R6)
    326 
    327 // reflectcall: call a function with the given argument list
    328 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
    329 // we don't have variable-sized frames, so we use a small number
    330 // of constant-sized-frame functions to encode a few bits of size in the pc.
    331 // Caution: ugly multiline assembly macros in your future!
    332 
    333 #define DISPATCH(NAME,MAXSIZE)		\
    334 	MOVD	$MAXSIZE, R27;		\
    335 	CMP	R27, R16;		\
    336 	BGT	3(PC);			\
    337 	MOVD	$NAME(SB), R27;	\
    338 	B	(R27)
    339 // Note: can't just "B NAME(SB)" - bad inlining results.
    340 
    341 TEXT reflectcall(SB), NOSPLIT, $0-0
    342 	B	reflectcall(SB)
    343 
    344 TEXT reflectcall(SB), NOSPLIT, $-8-32
    345 	MOVWU argsize+24(FP), R16
    346 	// NOTE(rsc): No call16, because CALLFN needs four words
    347 	// of argument space to invoke callwritebarrier.
    348 	DISPATCH(runtimecall32, 32)
    349 	DISPATCH(runtimecall64, 64)
    350 	DISPATCH(runtimecall128, 128)
    351 	DISPATCH(runtimecall256, 256)
    352 	DISPATCH(runtimecall512, 512)
    353 	DISPATCH(runtimecall1024, 1024)
    354 	DISPATCH(runtimecall2048, 2048)
    355 	DISPATCH(runtimecall4096, 4096)
    356 	DISPATCH(runtimecall8192, 8192)
    357 	DISPATCH(runtimecall16384, 16384)
    358 	DISPATCH(runtimecall32768, 32768)
    359 	DISPATCH(runtimecall65536, 65536)
    360 	DISPATCH(runtimecall131072, 131072)
    361 	DISPATCH(runtimecall262144, 262144)
    362 	DISPATCH(runtimecall524288, 524288)
    363 	DISPATCH(runtimecall1048576, 1048576)
    364 	DISPATCH(runtimecall2097152, 2097152)
    365 	DISPATCH(runtimecall4194304, 4194304)
    366 	DISPATCH(runtimecall8388608, 8388608)
    367 	DISPATCH(runtimecall16777216, 16777216)
    368 	DISPATCH(runtimecall33554432, 33554432)
    369 	DISPATCH(runtimecall67108864, 67108864)
    370 	DISPATCH(runtimecall134217728, 134217728)
    371 	DISPATCH(runtimecall268435456, 268435456)
    372 	DISPATCH(runtimecall536870912, 536870912)
    373 	DISPATCH(runtimecall1073741824, 1073741824)
    374 	MOVD	$runtimebadreflectcall(SB), R0
    375 	B	(R0)
    376 
    377 #define CALLFN(NAME,MAXSIZE)			\
    378 TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
    379 	NO_LOCAL_POINTERS;			\
    380 	/* copy arguments to stack */		\
    381 	MOVD	arg+16(FP), R3;			\
    382 	MOVWU	argsize+24(FP), R4;			\
    383 	MOVD	RSP, R5;				\
    384 	ADD	$(8-1), R5;			\
    385 	SUB	$1, R3;				\
    386 	ADD	R5, R4;				\
    387 	CMP	R5, R4;				\
    388 	BEQ	4(PC);				\
    389 	MOVBU.W	1(R3), R6;			\
    390 	MOVBU.W	R6, 1(R5);			\
    391 	B	-4(PC);				\
    392 	/* call function */			\
    393 	MOVD	f+8(FP), R26;			\
    394 	MOVD	(R26), R0;			\
    395 	PCDATA  $PCDATA_StackMapIndex, $0;	\
    396 	BL	(R0);				\
    397 	/* copy return values back */		\
    398 	MOVD	arg+16(FP), R3;			\
    399 	MOVWU	n+24(FP), R4;			\
    400 	MOVWU	retoffset+28(FP), R6;		\
    401 	MOVD	RSP, R5;				\
    402 	ADD	R6, R5; 			\
    403 	ADD	R6, R3;				\
    404 	SUB	R6, R4;				\
    405 	ADD	$(8-1), R5;			\
    406 	SUB	$1, R3;				\
    407 	ADD	R5, R4;				\
    408 loop:						\
    409 	CMP	R5, R4;				\
    410 	BEQ	end;				\
    411 	MOVBU.W	1(R5), R6;			\
    412 	MOVBU.W	R6, 1(R3);			\
    413 	B	loop;				\
    414 end:						\
    415 	/* execute write barrier updates */	\
    416 	MOVD	argtype+0(FP), R7;		\
    417 	MOVD	arg+16(FP), R3;			\
    418 	MOVWU	n+24(FP), R4;			\
    419 	MOVWU	retoffset+28(FP), R6;		\
    420 	MOVD	R7, 8(RSP);			\
    421 	MOVD	R3, 16(RSP);			\
    422 	MOVD	R4, 24(RSP);			\
    423 	MOVD	R6, 32(RSP);			\
    424 	BL	runtimecallwritebarrier(SB);	\
    425 	RET
    426 
    427 // These have 8 added to make the overall frame size a multiple of 16,
    428 // as required by the ABI. (There is another +8 for the saved LR.)
    429 CALLFN(call16, 24 )
    430 CALLFN(call32, 40 )
    431 CALLFN(call64, 72 )
    432 CALLFN(call128, 136 )
    433 CALLFN(call256, 264 )
    434 CALLFN(call512, 520 )
    435 CALLFN(call1024, 1032 )
    436 CALLFN(call2048, 2056 )
    437 CALLFN(call4096, 4104 )
    438 CALLFN(call8192, 8200 )
    439 CALLFN(call16384, 16392 )
    440 CALLFN(call32768, 32776 )
    441 CALLFN(call65536, 65544 )
    442 CALLFN(call131072, 131080 )
    443 CALLFN(call262144, 262152 )
    444 CALLFN(call524288, 524296 )
    445 CALLFN(call1048576, 1048584 )
    446 CALLFN(call2097152, 2097160 )
    447 CALLFN(call4194304, 4194312 )
    448 CALLFN(call8388608, 8388616 )
    449 CALLFN(call16777216, 16777224 )
    450 CALLFN(call33554432, 33554440 )
    451 CALLFN(call67108864, 67108872 )
    452 CALLFN(call134217728, 134217736 )
    453 CALLFN(call268435456, 268435464 )
    454 CALLFN(call536870912, 536870920 )
    455 CALLFN(call1073741824, 1073741832 )
    456 
    457 // bool cas(uint32 *ptr, uint32 old, uint32 new)
    458 // Atomically:
    459 //	if(*val == old){
    460 //		*val = new;
    461 //		return 1;
    462 //	} else
    463 //		return 0;
    464 TEXT runtimecas(SB), NOSPLIT, $0-17
    465 	MOVD	ptr+0(FP), R0
    466 	MOVW	old+8(FP), R1
    467 	MOVW	new+12(FP), R2
    468 again:
    469 	LDAXRW	(R0), R3
    470 	CMPW	R1, R3
    471 	BNE	ok
    472 	STLXRW	R2, (R0), R3
    473 	CBNZ	R3, again
    474 ok:
    475 	CSET	EQ, R0
    476 	MOVB	R0, ret+16(FP)
    477 	RET
    478 
    479 TEXT runtimecasuintptr(SB), NOSPLIT, $0-25
    480 	B	runtimecas64(SB)
    481 
    482 TEXT runtimeatomicloaduintptr(SB), NOSPLIT, $-8-16
    483 	B	runtimeatomicload64(SB)
    484 
    485 TEXT runtimeatomicloaduint(SB), NOSPLIT, $-8-16
    486 	B	runtimeatomicload64(SB)
    487 
    488 TEXT runtimeatomicstoreuintptr(SB), NOSPLIT, $0-16
    489 	B	runtimeatomicstore64(SB)
    490 
    491 // AES hashing not implemented for ARM64, issue #10109.
    492 TEXT runtimeaeshash(SB),NOSPLIT,$-8-0
    493 	MOVW	$0, R0
    494 	MOVW	(R0), R1
    495 TEXT runtimeaeshash32(SB),NOSPLIT,$-8-0
    496 	MOVW	$0, R0
    497 	MOVW	(R0), R1
    498 TEXT runtimeaeshash64(SB),NOSPLIT,$-8-0
    499 	MOVW	$0, R0
    500 	MOVW	(R0), R1
    501 TEXT runtimeaeshashstr(SB),NOSPLIT,$-8-0
    502 	MOVW	$0, R0
    503 	MOVW	(R0), R1
    504 
    505 // bool casp(void **val, void *old, void *new)
    506 // Atomically:
    507 //	if(*val == old){
    508 //		*val = new;
    509 //		return 1;
    510 //	} else
    511 //		return 0;
    512 TEXT runtimecasp1(SB), NOSPLIT, $0-25
    513 	B runtimecas64(SB)
    514 
    515 TEXT runtimeprocyield(SB),NOSPLIT,$0-0
    516 	MOVWU	cycles+0(FP), R0
    517 again:
    518 	YIELD
    519 	SUBW	$1, R0
    520 	CBNZ	R0, again
    521 	RET
    522 
    523 // void jmpdefer(fv, sp);
    524 // called from deferreturn.
    525 // 1. grab stored LR for caller
    526 // 2. sub 4 bytes to get back to BL deferreturn
    527 // 3. BR to fn
    528 TEXT runtimejmpdefer(SB), NOSPLIT, $-8-16
    529 	MOVD	0(RSP), R0
    530 	SUB	$4, R0
    531 	MOVD	R0, LR
    532 
    533 	MOVD	fv+0(FP), R26
    534 	MOVD	argp+8(FP), R0
    535 	MOVD	R0, RSP
    536 	SUB	$8, RSP
    537 	MOVD	0(R26), R3
    538 	B	(R3)
    539 
    540 // Save state of caller into g->sched. Smashes R0.
    541 TEXT gosave<>(SB),NOSPLIT,$-8
    542 	MOVD	LR, (g_sched+gobuf_pc)(g)
    543 	MOVD RSP, R0
    544 	MOVD	R0, (g_sched+gobuf_sp)(g)
    545 	MOVD	$0, (g_sched+gobuf_lr)(g)
    546 	MOVD	$0, (g_sched+gobuf_ret)(g)
    547 	MOVD	$0, (g_sched+gobuf_ctxt)(g)
    548 	RET
    549 
    550 // func asmcgocall(fn, arg unsafe.Pointer) int32
    551 // Call fn(arg) on the scheduler stack,
    552 // aligned appropriately for the gcc ABI.
    553 // See cgocall.go for more details.
    554 TEXT asmcgocall(SB),NOSPLIT,$0-20
    555 	MOVD	fn+0(FP), R1
    556 	MOVD	arg+8(FP), R0
    557 
    558 	MOVD	RSP, R2		// save original stack pointer
    559 	MOVD	g, R4
    560 
    561 	// Figure out if we need to switch to m->g0 stack.
    562 	// We get called to create new OS threads too, and those
    563 	// come in on the m->g0 stack already.
    564 	MOVD	g_m(g), R8
    565 	MOVD	m_g0(R8), R3
    566 	CMP	R3, g
    567 	BEQ	g0
    568 	MOVD	R0, R9	// gosave<> and save_g might clobber R0
    569 	BL	gosave<>(SB)
    570 	MOVD	R3, g
    571 	BL	runtimesave_g(SB)
    572 	MOVD	(g_sched+gobuf_sp)(g), R0
    573 	MOVD	R0, RSP
    574 	MOVD	R9, R0
    575 
    576 	// Now on a scheduling stack (a pthread-created stack).
    577 g0:
    578 	// Save room for two of our pointers /*, plus 32 bytes of callee
    579 	// save area that lives on the caller stack. */
    580 	MOVD	RSP, R13
    581 	SUB	$16, R13
    582 	MOVD	R13, RSP
    583 	MOVD	R4, 0(RSP)	// save old g on stack
    584 	MOVD	(g_stack+stack_hi)(R4), R4
    585 	SUB	R2, R4
    586 	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
    587 	BL	(R1)
    588 	MOVD	R0, R9
    589 
    590 	// Restore g, stack pointer.  R0 is errno, so don't touch it
    591 	MOVD	0(RSP), g
    592 	BL	runtimesave_g(SB)
    593 	MOVD	(g_stack+stack_hi)(g), R5
    594 	MOVD	8(RSP), R6
    595 	SUB	R6, R5
    596 	MOVD	R9, R0
    597 	MOVD	R5, RSP
    598 
    599 	MOVW	R0, ret+16(FP)
    600 	RET
    601 
    602 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
    603 // Turn the fn into a Go func (by taking its address) and call
    604 // cgocallback_gofunc.
    605 TEXT runtimecgocallback(SB),NOSPLIT,$24-24
    606 	MOVD	$fn+0(FP), R0
    607 	MOVD	R0, 8(RSP)
    608 	MOVD	frame+8(FP), R0
    609 	MOVD	R0, 16(RSP)
    610 	MOVD	framesize+16(FP), R0
    611 	MOVD	R0, 24(RSP)
    612 	MOVD	$runtimecgocallback_gofunc(SB), R0
    613 	BL	(R0)
    614 	RET
    615 
    616 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
    617 // See cgocall.go for more details.
    618 TEXT cgocallback_gofunc(SB),NOSPLIT,$24-24
    619 	NO_LOCAL_POINTERS
    620 
    621 	// Load g from thread-local storage.
    622 	MOVB	runtimeiscgo(SB), R3
    623 	CMP	$0, R3
    624 	BEQ	nocgo
    625 	BL	runtimeload_g(SB)
    626 nocgo:
    627 
    628 	// If g is nil, Go did not create the current thread.
    629 	// Call needm to obtain one for temporary use.
    630 	// In this case, we're running on the thread stack, so there's
    631 	// lots of space, but the linker doesn't know. Hide the call from
    632 	// the linker analysis by using an indirect call.
    633 	CMP	$0, g
    634 	BNE	havem
    635 	MOVD	g, savedm-8(SP) // g is zero, so is m.
    636 	MOVD	$runtimeneedm(SB), R0
    637 	BL	(R0)
    638 
    639 	// Set m->sched.sp = SP, so that if a panic happens
    640 	// during the function we are about to execute, it will
    641 	// have a valid SP to run on the g0 stack.
    642 	// The next few lines (after the havem label)
    643 	// will save this SP onto the stack and then write
    644 	// the same SP back to m->sched.sp. That seems redundant,
    645 	// but if an unrecovered panic happens, unwindm will
    646 	// restore the g->sched.sp from the stack location
    647 	// and then systemstack will try to use it. If we don't set it here,
    648 	// that restored SP will be uninitialized (typically 0) and
    649 	// will not be usable.
    650 	MOVD	g_m(g), R8
    651 	MOVD	m_g0(R8), R3
    652 	MOVD	RSP, R0
    653 	MOVD	R0, (g_sched+gobuf_sp)(R3)
    654 
    655 havem:
    656 	MOVD	g_m(g), R8
    657 	MOVD	R8, savedm-8(SP)
    658 	// Now there's a valid m, and we're running on its m->g0.
    659 	// Save current m->g0->sched.sp on stack and then set it to SP.
    660 	// Save current sp in m->g0->sched.sp in preparation for
    661 	// switch back to m->curg stack.
    662 	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
    663 	// Beware that the frame size is actually 32.
    664 	MOVD	m_g0(R8), R3
    665 	MOVD	(g_sched+gobuf_sp)(R3), R4
    666 	MOVD	R4, savedsp-16(SP)
    667 	MOVD	RSP, R0
    668 	MOVD	R0, (g_sched+gobuf_sp)(R3)
    669 
    670 	// Switch to m->curg stack and call runtime.cgocallbackg.
    671 	// Because we are taking over the execution of m->curg
    672 	// but *not* resuming what had been running, we need to
    673 	// save that information (m->curg->sched) so we can restore it.
    674 	// We can restore m->curg->sched.sp easily, because calling
    675 	// runtime.cgocallbackg leaves SP unchanged upon return.
    676 	// To save m->curg->sched.pc, we push it onto the stack.
    677 	// This has the added benefit that it looks to the traceback
    678 	// routine like cgocallbackg is going to return to that
    679 	// PC (because the frame we allocate below has the same
    680 	// size as cgocallback_gofunc's frame declared above)
    681 	// so that the traceback will seamlessly trace back into
    682 	// the earlier calls.
    683 	//
    684 	// In the new goroutine, -16(SP) and -8(SP) are unused.
    685 	MOVD	m_curg(R8), g
    686 	BL	runtimesave_g(SB)
    687 	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
    688 	MOVD	(g_sched+gobuf_pc)(g), R5
    689 	MOVD	R5, -(24+8)(R4)	// maintain 16-byte SP alignment
    690 	MOVD	$-(24+8)(R4), R0
    691 	MOVD	R0, RSP
    692 	BL	runtimecgocallbackg(SB)
    693 
    694 	// Restore g->sched (== m->curg->sched) from saved values.
    695 	MOVD	0(RSP), R5
    696 	MOVD	R5, (g_sched+gobuf_pc)(g)
    697 	MOVD	RSP, R4
    698 	ADD	$(24+8), R4, R4
    699 	MOVD	R4, (g_sched+gobuf_sp)(g)
    700 
    701 	// Switch back to m->g0's stack and restore m->g0->sched.sp.
    702 	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
    703 	// so we do not have to restore it.)
    704 	MOVD	g_m(g), R8
    705 	MOVD	m_g0(R8), g
    706 	BL	runtimesave_g(SB)
    707 	MOVD	(g_sched+gobuf_sp)(g), R0
    708 	MOVD	R0, RSP
    709 	MOVD	savedsp-16(SP), R4
    710 	MOVD	R4, (g_sched+gobuf_sp)(g)
    711 
    712 	// If the m on entry was nil, we called needm above to borrow an m
    713 	// for the duration of the call. Since the call is over, return it with dropm.
    714 	MOVD	savedm-8(SP), R6
    715 	CMP	$0, R6
    716 	BNE	droppedm
    717 	MOVD	$runtimedropm(SB), R0
    718 	BL	(R0)
    719 droppedm:
    720 
    721 	// Done!
    722 	RET
    723 
    724 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
    725 // Must obey the gcc calling convention.
    726 TEXT _cgo_topofstack(SB),NOSPLIT,$24
    727 	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
    728 	// are callee-save in the gcc calling convention, so save them.
    729 	MOVD	R27, savedR27-8(SP)
    730 	MOVD	g, saveG-16(SP)
    731 
    732 	BL	runtimeload_g(SB)
    733 	MOVD	g_m(g), R0
    734 	MOVD	m_curg(R0), R0
    735 	MOVD	(g_stack+stack_hi)(R0), R0
    736 
    737 	MOVD	saveG-16(SP), g
    738 	MOVD	savedR28-8(SP), R27
    739 	RET
    740 
    741 // void setg(G*); set g. for use by needm.
    742 TEXT runtimesetg(SB), NOSPLIT, $0-8
    743 	MOVD	gg+0(FP), g
    744 	// This only happens if iscgo, so jump straight to save_g
    745 	BL	runtimesave_g(SB)
    746 	RET
    747 
    748 // void setg_gcc(G*); set g called from gcc
    749 TEXT setg_gcc<>(SB),NOSPLIT,$8
    750 	MOVD	R0, g
    751 	MOVD	R27, savedR27-8(SP)
    752 	BL	runtimesave_g(SB)
    753 	MOVD	savedR27-8(SP), R27
    754 	RET
    755 
    756 TEXT runtimegetcallerpc(SB),NOSPLIT,$8-16
    757 	MOVD	16(RSP), R0		// LR saved by caller
    758 	MOVD	runtimestackBarrierPC(SB), R1
    759 	CMP	R0, R1
    760 	BNE	nobar
    761 	// Get original return PC.
    762 	BL	runtimenextBarrierPC(SB)
    763 	MOVD	8(RSP), R0
    764 nobar:
    765 	MOVD	R0, ret+8(FP)
    766 	RET
    767 
    768 TEXT runtimesetcallerpc(SB),NOSPLIT,$8-16
    769 	MOVD	pc+8(FP), R0
    770 	MOVD	16(RSP), R1
    771 	MOVD	runtimestackBarrierPC(SB), R2
    772 	CMP	R1, R2
    773 	BEQ	setbar
    774 	MOVD	R0, 16(RSP)		// set LR in caller
    775 	RET
    776 setbar:
    777 	// Set the stack barrier return PC.
    778 	MOVD	R0, 8(RSP)
    779 	BL	runtimesetNextBarrierPC(SB)
    780 	RET
    781 
    782 TEXT runtimegetcallersp(SB),NOSPLIT,$0-16
    783 	MOVD	argp+0(FP), R0
    784 	SUB	$8, R0
    785 	MOVD	R0, ret+8(FP)
    786 	RET
    787 
    788 TEXT runtimeabort(SB),NOSPLIT,$-8-0
    789 	B	(ZR)
    790 	UNDEF
    791 
    792 // memhash_varlen(p unsafe.Pointer, h seed) uintptr
    793 // redirects to memhash(p, h, size) using the size
    794 // stored in the closure.
    795 TEXT runtimememhash_varlen(SB),NOSPLIT,$40-24
    796 	GO_ARGS
    797 	NO_LOCAL_POINTERS
    798 	MOVD	p+0(FP), R3
    799 	MOVD	h+8(FP), R4
    800 	MOVD	8(R26), R5
    801 	MOVD	R3, 8(RSP)
    802 	MOVD	R4, 16(RSP)
    803 	MOVD	R5, 24(RSP)
    804 	BL	runtimememhash(SB)
    805 	MOVD	32(RSP), R3
    806 	MOVD	R3, ret+16(FP)
    807 	RET
    808 
    809 TEXT runtimememeq(SB),NOSPLIT,$-8-25
    810 	MOVD	a+0(FP), R1
    811 	MOVD	b+8(FP), R2
    812 	MOVD	size+16(FP), R3
    813 	ADD	R1, R3, R6
    814 	MOVD	$1, R0
    815 	MOVB	R0, ret+24(FP)
    816 loop:
    817 	CMP	R1, R6
    818 	BEQ	done
    819 	MOVBU.P	1(R1), R4
    820 	MOVBU.P	1(R2), R5
    821 	CMP	R4, R5
    822 	BEQ	loop
    823 
    824 	MOVB	$0, ret+24(FP)
    825 done:
    826 	RET
    827 
    828 // memequal_varlen(a, b unsafe.Pointer) bool
    829 TEXT runtimememequal_varlen(SB),NOSPLIT,$40-17
    830 	MOVD	a+0(FP), R3
    831 	MOVD	b+8(FP), R4
    832 	CMP	R3, R4
    833 	BEQ	eq
    834 	MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
    835 	MOVD	R3, 8(RSP)
    836 	MOVD	R4, 16(RSP)
    837 	MOVD	R5, 24(RSP)
    838 	BL	runtimememeq(SB)
    839 	MOVBU	32(RSP), R3
    840 	MOVB	R3, ret+16(FP)
    841 	RET
    842 eq:
    843 	MOVD	$1, R3
    844 	MOVB	R3, ret+16(FP)
    845 	RET
    846 
    847 TEXT runtimecmpstring(SB),NOSPLIT,$-4-40
    848 	MOVD	s1_base+0(FP), R2
    849 	MOVD	s1_len+8(FP), R0
    850 	MOVD	s2_base+16(FP), R3
    851 	MOVD	s2_len+24(FP), R1
    852 	ADD	$40, RSP, R7
    853 	B	runtimecmpbody<>(SB)
    854 
    855 TEXT bytesCompare(SB),NOSPLIT,$-4-56
    856 	MOVD	s1+0(FP), R2
    857 	MOVD	s1+8(FP), R0
    858 	MOVD	s2+24(FP), R3
    859 	MOVD	s2+32(FP), R1
    860 	ADD	$56, RSP, R7
    861 	B	runtimecmpbody<>(SB)
    862 
    863 // On entry:
    864 // R0 is the length of s1
    865 // R1 is the length of s2
    866 // R2 points to the start of s1
    867 // R3 points to the start of s2
    868 // R7 points to return value (-1/0/1 will be written here)
    869 //
    870 // On exit:
    871 // R4, R5, and R6 are clobbered
    872 TEXT runtimecmpbody<>(SB),NOSPLIT,$-4-0
    873 	CMP	R0, R1
    874 	CSEL    LT, R1, R0, R6 // R6 is min(R0, R1)
    875 
    876 	ADD	R2, R6	// R2 is current byte in s1, R6 is last byte in s1 to compare
    877 loop:
    878 	CMP	R2, R6
    879 	BEQ	samebytes // all compared bytes were the same; compare lengths
    880 	MOVBU.P	1(R2), R4
    881 	MOVBU.P	1(R3), R5
    882 	CMP	R4, R5
    883 	BEQ	loop
    884 	// bytes differed
    885 	MOVD	$1, R4
    886 	CSNEG	LT, R4, R4, R4
    887 	MOVD	R4, (R7)
    888 	RET
    889 samebytes:
    890 	MOVD	$1, R4
    891 	CMP	R0, R1
    892 	CSNEG	LT, R4, R4, R4
    893 	CSEL	EQ, ZR, R4, R4
    894 	MOVD	R4, (R7)
    895 	RET
    896 
    897 // eqstring tests whether two strings are equal.
    898 // The compiler guarantees that strings passed
    899 // to eqstring have equal length.
    900 // See runtime_test.go:eqstring_generic for
    901 // equivalent Go code.
    902 TEXT runtimeeqstring(SB),NOSPLIT,$0-33
    903 	MOVD	s1str+0(FP), R0
    904 	MOVD	s1len+8(FP), R1
    905 	MOVD	s2str+16(FP), R2
    906 	ADD	R0, R1		// end
    907 loop:
    908 	CMP	R0, R1
    909 	BEQ	equal		// reaches the end
    910 	MOVBU.P	1(R0), R4
    911 	MOVBU.P	1(R2), R5
    912 	CMP	R4, R5
    913 	BEQ	loop
    914 notequal:
    915 	MOVB	ZR, ret+32(FP)
    916 	RET
    917 equal:
    918 	MOVD	$1, R0
    919 	MOVB	R0, ret+32(FP)
    920 	RET
    921 
    922 //
    923 // functions for other packages
    924 //
    925 TEXT bytesIndexByte(SB),NOSPLIT,$0-40
    926 	MOVD	b+0(FP), R0
    927 	MOVD	b_len+8(FP), R1
    928 	MOVBU	c+24(FP), R2	// byte to find
    929 	MOVD	R0, R4		// store base for later
    930 	ADD	R0, R1		// end
    931 loop:
    932 	CMP	R0, R1
    933 	BEQ	notfound
    934 	MOVBU.P	1(R0), R3
    935 	CMP	R2, R3
    936 	BNE	loop
    937 
    938 	SUB	$1, R0		// R0 will be one beyond the position we want
    939 	SUB	R4, R0		// remove base
    940 	MOVD	R0, ret+32(FP)
    941 	RET
    942 
    943 notfound:
    944 	MOVD	$-1, R0
    945 	MOVD	R0, ret+32(FP)
    946 	RET
    947 
    948 TEXT stringsIndexByte(SB),NOSPLIT,$0-32
    949 	MOVD	s+0(FP), R0
    950 	MOVD	s_len+8(FP), R1
    951 	MOVBU	c+16(FP), R2	// byte to find
    952 	MOVD	R0, R4		// store base for later
    953 	ADD	R0, R1		// end
    954 loop:
    955 	CMP	R0, R1
    956 	BEQ	notfound
    957 	MOVBU.P	1(R0), R3
    958 	CMP	R2, R3
    959 	BNE	loop
    960 
    961 	SUB	$1, R0		// R0 will be one beyond the position we want
    962 	SUB	R4, R0		// remove base
    963 	MOVD	R0, ret+24(FP)
    964 	RET
    965 
    966 notfound:
    967 	MOVD	$-1, R0
    968 	MOVD	R0, ret+24(FP)
    969 	RET
    970 
    971 // TODO: share code with memeq?
    972 TEXT bytesEqual(SB),NOSPLIT,$0-49
    973 	MOVD	a_len+8(FP), R1
    974 	MOVD	b_len+32(FP), R3
    975 	CMP	R1, R3		// unequal lengths are not equal
    976 	BNE	notequal
    977 	MOVD	a+0(FP), R0
    978 	MOVD	b+24(FP), R2
    979 	ADD	R0, R1		// end
    980 loop:
    981 	CMP	R0, R1
    982 	BEQ	equal		// reaches the end
    983 	MOVBU.P	1(R0), R4
    984 	MOVBU.P	1(R2), R5
    985 	CMP	R4, R5
    986 	BEQ	loop
    987 notequal:
    988 	MOVB	ZR, ret+48(FP)
    989 	RET
    990 equal:
    991 	MOVD	$1, R0
    992 	MOVB	R0, ret+48(FP)
    993 	RET
    994 
    995 TEXT runtimefastrand1(SB),NOSPLIT,$-8-4
    996 	MOVD	g_m(g), R1
    997 	MOVWU	m_fastrand(R1), R0
    998 	ADD	R0, R0
    999 	CMPW	$0, R0
   1000 	BGE	notneg
   1001 	EOR	$0x88888eef, R0
   1002 notneg:
   1003 	MOVW	R0, m_fastrand(R1)
   1004 	MOVW	R0, ret+0(FP)
   1005 	RET
   1006 
   1007 TEXT runtimereturn0(SB), NOSPLIT, $0
   1008 	MOVW	$0, R0
   1009 	RET
   1010 
   1011 // The top-most function running on a goroutine
   1012 // returns to goexit+PCQuantum.
   1013 TEXT runtimegoexit(SB),NOSPLIT,$-8-0
   1014 	MOVD	R0, R0	// NOP
   1015 	BL	runtimegoexit1(SB)	// does not return
   1016 
   1017 // TODO(aram): use PRFM here.
   1018 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-8
   1019 	RET
   1020 
   1021 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-8
   1022 	RET
   1023 
   1024 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-8
   1025 	RET
   1026 
   1027 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-8
   1028 	RET
   1029 
   1030