Home | History | Annotate | Download | only in runtime
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "go_asm.h"
      6 #include "go_tls.h"
      7 #include "funcdata.h"
      8 #include "textflag.h"
      9 
     10 TEXT runtimert0_go(SB),NOSPLIT,$0
     11 	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
     12 	// C TLS base pointer in AR0:AR1
     13 
     14 	// initialize essential registers
     15 	XOR	R0, R0
     16 
     17 	SUB	$24, R15
     18 	MOVW	R2, 8(R15) // argc
     19 	MOVD	R3, 16(R15) // argv
     20 
     21 	// create istack out of the given (operating system) stack.
     22 	// _cgo_init may update stackguard.
     23 	MOVD	$runtimeg0(SB), g
     24 	MOVD	R15, R11
     25 	SUB	$(64*1024), R11
     26 	MOVD	R11, g_stackguard0(g)
     27 	MOVD	R11, g_stackguard1(g)
     28 	MOVD	R11, (g_stack+stack_lo)(g)
     29 	MOVD	R15, (g_stack+stack_hi)(g)
     30 
     31 	// if there is a _cgo_init, call it using the gcc ABI.
     32 	MOVD	_cgo_init(SB), R11
     33 	CMPBEQ	R11, $0, nocgo
     34 	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
     35 	SLD	$32, R4, R4
     36 	MOVW	AR1, R4			// arg 2: TLS base pointer
     37 	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
     38 	MOVD	g, R2			// arg 0: G
     39 	// C functions expect 160 bytes of space on caller stack frame
     40 	// and an 8-byte aligned stack pointer
     41 	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
     42 	SUB	$160, R15		// reserve 160 bytes
     43 	MOVD    $~7, R6
     44 	AND 	R6, R15			// 8-byte align
     45 	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
     46 	MOVD	R9, R15			// restore stack
     47 	XOR	R0, R0			// zero R0
     48 
     49 nocgo:
     50 	// update stackguard after _cgo_init
     51 	MOVD	(g_stack+stack_lo)(g), R2
     52 	ADD	$const__StackGuard, R2
     53 	MOVD	R2, g_stackguard0(g)
     54 	MOVD	R2, g_stackguard1(g)
     55 
     56 	// set the per-goroutine and per-mach "registers"
     57 	MOVD	$runtimem0(SB), R2
     58 
     59 	// save m->g0 = g0
     60 	MOVD	g, m_g0(R2)
     61 	// save m0 to g0->m
     62 	MOVD	R2, g_m(g)
     63 
     64 	BL	runtimecheck(SB)
     65 
     66 	// argc/argv are already prepared on stack
     67 	BL	runtimeargs(SB)
     68 	BL	runtimeosinit(SB)
     69 	BL	runtimeschedinit(SB)
     70 
     71 	// create a new goroutine to start program
     72 	MOVD	$runtimemainPC(SB), R2		// entry
     73 	SUB     $24, R15
     74 	MOVD 	R2, 16(R15)
     75 	MOVD 	$0, 8(R15)
     76 	MOVD 	$0, 0(R15)
     77 	BL	runtimenewproc(SB)
     78 	ADD	$24, R15
     79 
     80 	// start this M
     81 	BL	runtimemstart(SB)
     82 
     83 	MOVD	$0, 1(R0)
     84 	RET
     85 
     86 DATA	runtimemainPC+0(SB)/8,$runtimemain(SB)
     87 GLOBL	runtimemainPC(SB),RODATA,$8
     88 
     89 TEXT runtimebreakpoint(SB),NOSPLIT|NOFRAME,$0-0
     90 	MOVD	$0, 2(R0)
     91 	RET
     92 
     93 TEXT runtimeasminit(SB),NOSPLIT|NOFRAME,$0-0
     94 	RET
     95 
     96 /*
     97  *  go-routine
     98  */
     99 
    100 // void gosave(Gobuf*)
    101 // save state in Gobuf; setjmp
    102 TEXT runtimegosave(SB), NOSPLIT, $-8-8
    103 	MOVD	buf+0(FP), R3
    104 	MOVD	R15, gobuf_sp(R3)
    105 	MOVD	LR, gobuf_pc(R3)
    106 	MOVD	g, gobuf_g(R3)
    107 	MOVD	$0, gobuf_lr(R3)
    108 	MOVD	$0, gobuf_ret(R3)
    109 	// Assert ctxt is zero. See func save.
    110 	MOVD	gobuf_ctxt(R3), R3
    111 	CMPBEQ	R3, $0, 2(PC)
    112 	BL	runtimebadctxt(SB)
    113 	RET
    114 
    115 // void gogo(Gobuf*)
    116 // restore state from Gobuf; longjmp
    117 TEXT runtimegogo(SB), NOSPLIT, $16-8
    118 	MOVD	buf+0(FP), R5
    119 
    120 	// If ctxt is not nil, invoke deletion barrier before overwriting.
    121 	MOVD	gobuf_ctxt(R5), R1
    122 	CMPBEQ	R1, $0, nilctxt
    123 	MOVD	$gobuf_ctxt(R5), R1
    124 	MOVD	R1, 8(R15)
    125 	MOVD	R0, 16(R15)
    126 	BL	runtimewritebarrierptr_prewrite(SB)
    127 	MOVD	buf+0(FP), R5
    128 
    129 nilctxt:
    130 	MOVD	gobuf_g(R5), g	// make sure g is not nil
    131 	BL	runtimesave_g(SB)
    132 
    133 	MOVD	0(g), R4
    134 	MOVD	gobuf_sp(R5), R15
    135 	MOVD	gobuf_lr(R5), LR
    136 	MOVD	gobuf_ret(R5), R3
    137 	MOVD	gobuf_ctxt(R5), R12
    138 	MOVD	$0, gobuf_sp(R5)
    139 	MOVD	$0, gobuf_ret(R5)
    140 	MOVD	$0, gobuf_lr(R5)
    141 	MOVD	$0, gobuf_ctxt(R5)
    142 	CMP	R0, R0 // set condition codes for == test, needed by stack split
    143 	MOVD	gobuf_pc(R5), R6
    144 	BR	(R6)
    145 
    146 // void mcall(fn func(*g))
    147 // Switch to m->g0's stack, call fn(g).
    148 // Fn must never return.  It should gogo(&g->sched)
    149 // to keep running g.
    150 TEXT runtimemcall(SB), NOSPLIT, $-8-8
    151 	// Save caller state in g->sched
    152 	MOVD	R15, (g_sched+gobuf_sp)(g)
    153 	MOVD	LR, (g_sched+gobuf_pc)(g)
    154 	MOVD	$0, (g_sched+gobuf_lr)(g)
    155 	MOVD	g, (g_sched+gobuf_g)(g)
    156 
    157 	// Switch to m->g0 & its stack, call fn.
    158 	MOVD	g, R3
    159 	MOVD	g_m(g), R8
    160 	MOVD	m_g0(R8), g
    161 	BL	runtimesave_g(SB)
    162 	CMP	g, R3
    163 	BNE	2(PC)
    164 	BR	runtimebadmcall(SB)
    165 	MOVD	fn+0(FP), R12			// context
    166 	MOVD	0(R12), R4			// code pointer
    167 	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
    168 	SUB	$16, R15
    169 	MOVD	R3, 8(R15)
    170 	MOVD	$0, 0(R15)
    171 	BL	(R4)
    172 	BR	runtimebadmcall2(SB)
    173 
    174 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
    175 // of the G stack.  We need to distinguish the routine that
    176 // lives at the bottom of the G stack from the one that lives
    177 // at the top of the system stack because the one at the top of
    178 // the system stack terminates the stack walk (see topofstack()).
    179 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0
    180 	UNDEF
    181 	BL	(LR)	// make sure this function is not leaf
    182 	RET
    183 
    184 // func systemstack(fn func())
    185 TEXT runtimesystemstack(SB), NOSPLIT, $0-8
    186 	MOVD	fn+0(FP), R3	// R3 = fn
    187 	MOVD	R3, R12		// context
    188 	MOVD	g_m(g), R4	// R4 = m
    189 
    190 	MOVD	m_gsignal(R4), R5	// R5 = gsignal
    191 	CMPBEQ	g, R5, noswitch
    192 
    193 	MOVD	m_g0(R4), R5	// R5 = g0
    194 	CMPBEQ	g, R5, noswitch
    195 
    196 	MOVD	m_curg(R4), R6
    197 	CMPBEQ	g, R6, switch
    198 
    199 	// Bad: g is not gsignal, not g0, not curg. What is it?
    200 	// Hide call from linker nosplit analysis.
    201 	MOVD	$runtimebadsystemstack(SB), R3
    202 	BL	(R3)
    203 
    204 switch:
    205 	// save our state in g->sched.  Pretend to
    206 	// be systemstack_switch if the G stack is scanned.
    207 	MOVD	$runtimesystemstack_switch(SB), R6
    208 	ADD	$16, R6	// get past prologue
    209 	MOVD	R6, (g_sched+gobuf_pc)(g)
    210 	MOVD	R15, (g_sched+gobuf_sp)(g)
    211 	MOVD	$0, (g_sched+gobuf_lr)(g)
    212 	MOVD	g, (g_sched+gobuf_g)(g)
    213 
    214 	// switch to g0
    215 	MOVD	R5, g
    216 	BL	runtimesave_g(SB)
    217 	MOVD	(g_sched+gobuf_sp)(g), R3
    218 	// make it look like mstart called systemstack on g0, to stop traceback
    219 	SUB	$8, R3
    220 	MOVD	$runtimemstart(SB), R4
    221 	MOVD	R4, 0(R3)
    222 	MOVD	R3, R15
    223 
    224 	// call target function
    225 	MOVD	0(R12), R3	// code pointer
    226 	BL	(R3)
    227 
    228 	// switch back to g
    229 	MOVD	g_m(g), R3
    230 	MOVD	m_curg(R3), g
    231 	BL	runtimesave_g(SB)
    232 	MOVD	(g_sched+gobuf_sp)(g), R15
    233 	MOVD	$0, (g_sched+gobuf_sp)(g)
    234 	RET
    235 
    236 noswitch:
    237 	// already on m stack, just call directly
    238 	MOVD	0(R12), R3	// code pointer
    239 	BL	(R3)
    240 	RET
    241 
    242 /*
    243  * support for morestack
    244  */
    245 
    246 // Called during function prolog when more stack is needed.
    247 // Caller has already loaded:
    248 // R3: framesize, R4: argsize, R5: LR
    249 //
    250 // The traceback routines see morestack on a g0 as being
    251 // the top of a stack (for example, morestack calling newstack
    252 // calling the scheduler calling newm calling gc), so we must
    253 // record an argument size. For that purpose, it has no arguments.
    254 TEXT runtimemorestack(SB),NOSPLIT|NOFRAME,$0-0
    255 	// Cannot grow scheduler stack (m->g0).
    256 	MOVD	g_m(g), R7
    257 	MOVD	m_g0(R7), R8
    258 	CMPBNE	g, R8, 3(PC)
    259 	BL	runtimebadmorestackg0(SB)
    260 	BL	runtimeabort(SB)
    261 
    262 	// Cannot grow signal stack (m->gsignal).
    263 	MOVD	m_gsignal(R7), R8
    264 	CMP	g, R8
    265 	BNE	3(PC)
    266 	BL	runtimebadmorestackgsignal(SB)
    267 	BL	runtimeabort(SB)
    268 
    269 	// Called from f.
    270 	// Set g->sched to context in f.
    271 	MOVD	R15, (g_sched+gobuf_sp)(g)
    272 	MOVD	LR, R8
    273 	MOVD	R8, (g_sched+gobuf_pc)(g)
    274 	MOVD	R5, (g_sched+gobuf_lr)(g)
    275 	// newstack will fill gobuf.ctxt.
    276 
    277 	// Called from f.
    278 	// Set m->morebuf to f's caller.
    279 	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
    280 	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
    281 	MOVD	g, (m_morebuf+gobuf_g)(R7)
    282 
    283 	// Call newstack on m->g0's stack.
    284 	MOVD	m_g0(R7), g
    285 	BL	runtimesave_g(SB)
    286 	MOVD	(g_sched+gobuf_sp)(g), R15
    287 	// Create a stack frame on g0 to call newstack.
    288 	MOVD	$0, -16(R15)	// Zero saved LR in frame
    289 	SUB	$16, R15
    290 	MOVD	R12, 8(R15)	// ctxt argument
    291 	BL	runtimenewstack(SB)
    292 
    293 	// Not reached, but make sure the return PC from the call to newstack
    294 	// is still in this function, and not the beginning of the next.
    295 	UNDEF
    296 
    297 TEXT runtimemorestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
    298 	MOVD	$0, R12
    299 	BR	runtimemorestack(SB)
    300 
    301 TEXT runtimestackBarrier(SB),NOSPLIT,$0
    302 	// We came here via a RET to an overwritten LR.
    303 	// R3 may be live. Other registers are available.
    304 
    305 	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
    306 	MOVD	(g_stkbar+slice_array)(g), R4
    307 	MOVD	g_stkbarPos(g), R5
    308 	MOVD	$stkbar__size, R6
    309 	MULLD	R5, R6
    310 	ADD	R4, R6
    311 	MOVD	stkbar_savedLRVal(R6), R6
    312 	// Record that this stack barrier was hit.
    313 	ADD	$1, R5
    314 	MOVD	R5, g_stkbarPos(g)
    315 	// Jump to the original return PC.
    316 	BR	(R6)
    317 
    318 // reflectcall: call a function with the given argument list
    319 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
    320 // we don't have variable-sized frames, so we use a small number
    321 // of constant-sized-frame functions to encode a few bits of size in the pc.
    322 // Caution: ugly multiline assembly macros in your future!
    323 
    324 #define DISPATCH(NAME,MAXSIZE)		\
    325 	MOVD	$MAXSIZE, R4;		\
    326 	CMP	R3, R4;		\
    327 	BGT	3(PC);			\
    328 	MOVD	$NAME(SB), R5;	\
    329 	BR	(R5)
    330 // Note: can't just "BR NAME(SB)" - bad inlining results.
    331 
    332 TEXT reflectcall(SB), NOSPLIT, $0-0
    333 	BR	reflectcall(SB)
    334 
    335 TEXT reflectcall(SB), NOSPLIT, $-8-32
    336 	MOVWZ argsize+24(FP), R3
    337 	DISPATCH(runtimecall32, 32)
    338 	DISPATCH(runtimecall64, 64)
    339 	DISPATCH(runtimecall128, 128)
    340 	DISPATCH(runtimecall256, 256)
    341 	DISPATCH(runtimecall512, 512)
    342 	DISPATCH(runtimecall1024, 1024)
    343 	DISPATCH(runtimecall2048, 2048)
    344 	DISPATCH(runtimecall4096, 4096)
    345 	DISPATCH(runtimecall8192, 8192)
    346 	DISPATCH(runtimecall16384, 16384)
    347 	DISPATCH(runtimecall32768, 32768)
    348 	DISPATCH(runtimecall65536, 65536)
    349 	DISPATCH(runtimecall131072, 131072)
    350 	DISPATCH(runtimecall262144, 262144)
    351 	DISPATCH(runtimecall524288, 524288)
    352 	DISPATCH(runtimecall1048576, 1048576)
    353 	DISPATCH(runtimecall2097152, 2097152)
    354 	DISPATCH(runtimecall4194304, 4194304)
    355 	DISPATCH(runtimecall8388608, 8388608)
    356 	DISPATCH(runtimecall16777216, 16777216)
    357 	DISPATCH(runtimecall33554432, 33554432)
    358 	DISPATCH(runtimecall67108864, 67108864)
    359 	DISPATCH(runtimecall134217728, 134217728)
    360 	DISPATCH(runtimecall268435456, 268435456)
    361 	DISPATCH(runtimecall536870912, 536870912)
    362 	DISPATCH(runtimecall1073741824, 1073741824)
    363 	MOVD	$runtimebadreflectcall(SB), R5
    364 	BR	(R5)
    365 
    366 #define CALLFN(NAME,MAXSIZE)			\
    367 TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
    368 	NO_LOCAL_POINTERS;			\
    369 	/* copy arguments to stack */		\
    370 	MOVD	arg+16(FP), R4;			\
    371 	MOVWZ	argsize+24(FP), R5;		\
    372 	MOVD	$stack-MAXSIZE(SP), R6;		\
    373 loopArgs: /* copy 256 bytes at a time */	\
    374 	CMP	R5, $256;			\
    375 	BLT	tailArgs;			\
    376 	SUB	$256, R5;			\
    377 	MVC	$256, 0(R4), 0(R6);		\
    378 	MOVD	$256(R4), R4;			\
    379 	MOVD	$256(R6), R6;			\
    380 	BR	loopArgs;			\
    381 tailArgs: /* copy remaining bytes */		\
    382 	CMP	R5, $0;				\
    383 	BEQ	callFunction;			\
    384 	SUB	$1, R5;				\
    385 	EXRL	$callfnMVC<>(SB), R5;		\
    386 callFunction:					\
    387 	MOVD	f+8(FP), R12;			\
    388 	MOVD	(R12), R8;			\
    389 	PCDATA  $PCDATA_StackMapIndex, $0;	\
    390 	BL	(R8);				\
    391 	/* copy return values back */		\
    392 	MOVD	argtype+0(FP), R7;		\
    393 	MOVD	arg+16(FP), R6;			\
    394 	MOVWZ	n+24(FP), R5;			\
    395 	MOVD	$stack-MAXSIZE(SP), R4;		\
    396 	MOVWZ	retoffset+28(FP), R1;		\
    397 	ADD	R1, R4;				\
    398 	ADD	R1, R6;				\
    399 	SUB	R1, R5;				\
    400 	BL	callRet<>(SB);			\
    401 	RET
    402 
    403 // callRet copies return values back at the end of call*. This is a
    404 // separate function so it can allocate stack space for the arguments
    405 // to reflectcallmove. It does not follow the Go ABI; it expects its
    406 // arguments in registers.
    407 TEXT callRet<>(SB), NOSPLIT, $32-0
    408 	MOVD	R7, 8(R15)
    409 	MOVD	R6, 16(R15)
    410 	MOVD	R4, 24(R15)
    411 	MOVD	R5, 32(R15)
    412 	BL	runtimereflectcallmove(SB)
    413 	RET
    414 
    415 CALLFN(call32, 32)
    416 CALLFN(call64, 64)
    417 CALLFN(call128, 128)
    418 CALLFN(call256, 256)
    419 CALLFN(call512, 512)
    420 CALLFN(call1024, 1024)
    421 CALLFN(call2048, 2048)
    422 CALLFN(call4096, 4096)
    423 CALLFN(call8192, 8192)
    424 CALLFN(call16384, 16384)
    425 CALLFN(call32768, 32768)
    426 CALLFN(call65536, 65536)
    427 CALLFN(call131072, 131072)
    428 CALLFN(call262144, 262144)
    429 CALLFN(call524288, 524288)
    430 CALLFN(call1048576, 1048576)
    431 CALLFN(call2097152, 2097152)
    432 CALLFN(call4194304, 4194304)
    433 CALLFN(call8388608, 8388608)
    434 CALLFN(call16777216, 16777216)
    435 CALLFN(call33554432, 33554432)
    436 CALLFN(call67108864, 67108864)
    437 CALLFN(call134217728, 134217728)
    438 CALLFN(call268435456, 268435456)
    439 CALLFN(call536870912, 536870912)
    440 CALLFN(call1073741824, 1073741824)
    441 
    442 // Not a function: target for EXRL (execute relative long) instruction.
    443 TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
    444 	MVC	$1, 0(R4), 0(R6)
    445 
    446 TEXT runtimeprocyield(SB),NOSPLIT,$0-0
    447 	RET
    448 
    449 // void jmpdefer(fv, sp);
    450 // called from deferreturn.
    451 // 1. grab stored LR for caller
    452 // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
    453 // 3. BR to fn
    454 TEXT runtimejmpdefer(SB),NOSPLIT|NOFRAME,$0-16
    455 	MOVD	0(R15), R1
    456 	SUB	$6, R1, LR
    457 
    458 	MOVD	fv+0(FP), R12
    459 	MOVD	argp+8(FP), R15
    460 	SUB	$8, R15
    461 	MOVD	0(R12), R3
    462 	BR	(R3)
    463 
    464 // Save state of caller into g->sched. Smashes R1.
    465 TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
    466 	MOVD	LR, (g_sched+gobuf_pc)(g)
    467 	MOVD	R15, (g_sched+gobuf_sp)(g)
    468 	MOVD	$0, (g_sched+gobuf_lr)(g)
    469 	MOVD	$0, (g_sched+gobuf_ret)(g)
    470 	// Assert ctxt is zero. See func save.
    471 	MOVD	(g_sched+gobuf_ctxt)(g), R1
    472 	CMPBEQ	R1, $0, 2(PC)
    473 	BL	runtimebadctxt(SB)
    474 	RET
    475 
    476 // func asmcgocall(fn, arg unsafe.Pointer) int32
    477 // Call fn(arg) on the scheduler stack,
    478 // aligned appropriately for the gcc ABI.
    479 // See cgocall.go for more details.
    480 TEXT asmcgocall(SB),NOSPLIT,$0-20
    481 	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    482 	// C TLS base pointer in AR0:AR1
    483 	MOVD	fn+0(FP), R3
    484 	MOVD	arg+8(FP), R4
    485 
    486 	MOVD	R15, R2		// save original stack pointer
    487 	MOVD	g, R5
    488 
    489 	// Figure out if we need to switch to m->g0 stack.
    490 	// We get called to create new OS threads too, and those
    491 	// come in on the m->g0 stack already.
    492 	MOVD	g_m(g), R6
    493 	MOVD	m_g0(R6), R6
    494 	CMPBEQ	R6, g, g0
    495 	BL	gosave<>(SB)
    496 	MOVD	R6, g
    497 	BL	runtimesave_g(SB)
    498 	MOVD	(g_sched+gobuf_sp)(g), R15
    499 
    500 	// Now on a scheduling stack (a pthread-created stack).
    501 g0:
    502 	// Save room for two of our pointers, plus 160 bytes of callee
    503 	// save area that lives on the caller stack.
    504 	SUB	$176, R15
    505 	MOVD	$~7, R6
    506 	AND	R6, R15                 // 8-byte alignment for gcc ABI
    507 	MOVD	R5, 168(R15)             // save old g on stack
    508 	MOVD	(g_stack+stack_hi)(R5), R5
    509 	SUB	R2, R5
    510 	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
    511 	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
    512 	MOVD	R4, R2                  // arg in R2
    513 	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
    514 
    515 	XOR	R0, R0                  // set R0 back to 0.
    516 	// Restore g, stack pointer.
    517 	MOVD	168(R15), g
    518 	BL	runtimesave_g(SB)
    519 	MOVD	(g_stack+stack_hi)(g), R5
    520 	MOVD	160(R15), R6
    521 	SUB	R6, R5
    522 	MOVD	R5, R15
    523 
    524 	MOVW	R2, ret+16(FP)
    525 	RET
    526 
    527 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
    528 // Turn the fn into a Go func (by taking its address) and call
    529 // cgocallback_gofunc.
    530 TEXT runtimecgocallback(SB),NOSPLIT,$32-32
    531 	MOVD	$fn+0(FP), R3
    532 	MOVD	R3, 8(R15)
    533 	MOVD	frame+8(FP), R3
    534 	MOVD	R3, 16(R15)
    535 	MOVD	framesize+16(FP), R3
    536 	MOVD	R3, 24(R15)
    537 	MOVD	ctxt+24(FP), R3
    538 	MOVD	R3, 32(R15)
    539 	MOVD	$runtimecgocallback_gofunc(SB), R3
    540 	BL	(R3)
    541 	RET
    542 
    543 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
    544 // See cgocall.go for more details.
    545 TEXT cgocallback_gofunc(SB),NOSPLIT,$16-32
    546 	NO_LOCAL_POINTERS
    547 
    548 	// Load m and g from thread-local storage.
    549 	MOVB	runtimeiscgo(SB), R3
    550 	CMPBEQ	R3, $0, nocgo
    551 	BL	runtimeload_g(SB)
    552 
    553 nocgo:
    554 	// If g is nil, Go did not create the current thread.
    555 	// Call needm to obtain one for temporary use.
    556 	// In this case, we're running on the thread stack, so there's
    557 	// lots of space, but the linker doesn't know. Hide the call from
    558 	// the linker analysis by using an indirect call.
    559 	CMPBEQ	g, $0, needm
    560 
    561 	MOVD	g_m(g), R8
    562 	MOVD	R8, savedm-8(SP)
    563 	BR	havem
    564 
    565 needm:
    566 	MOVD	g, savedm-8(SP) // g is zero, so is m.
    567 	MOVD	$runtimeneedm(SB), R3
    568 	BL	(R3)
    569 
    570 	// Set m->sched.sp = SP, so that if a panic happens
    571 	// during the function we are about to execute, it will
    572 	// have a valid SP to run on the g0 stack.
    573 	// The next few lines (after the havem label)
    574 	// will save this SP onto the stack and then write
    575 	// the same SP back to m->sched.sp. That seems redundant,
    576 	// but if an unrecovered panic happens, unwindm will
    577 	// restore the g->sched.sp from the stack location
    578 	// and then systemstack will try to use it. If we don't set it here,
    579 	// that restored SP will be uninitialized (typically 0) and
    580 	// will not be usable.
    581 	MOVD	g_m(g), R8
    582 	MOVD	m_g0(R8), R3
    583 	MOVD	R15, (g_sched+gobuf_sp)(R3)
    584 
    585 havem:
    586 	// Now there's a valid m, and we're running on its m->g0.
    587 	// Save current m->g0->sched.sp on stack and then set it to SP.
    588 	// Save current sp in m->g0->sched.sp in preparation for
    589 	// switch back to m->curg stack.
    590 	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
    591 	MOVD	m_g0(R8), R3
    592 	MOVD	(g_sched+gobuf_sp)(R3), R4
    593 	MOVD	R4, savedsp-16(SP)
    594 	MOVD	R15, (g_sched+gobuf_sp)(R3)
    595 
    596 	// Switch to m->curg stack and call runtime.cgocallbackg.
    597 	// Because we are taking over the execution of m->curg
    598 	// but *not* resuming what had been running, we need to
    599 	// save that information (m->curg->sched) so we can restore it.
    600 	// We can restore m->curg->sched.sp easily, because calling
    601 	// runtime.cgocallbackg leaves SP unchanged upon return.
    602 	// To save m->curg->sched.pc, we push it onto the stack.
    603 	// This has the added benefit that it looks to the traceback
    604 	// routine like cgocallbackg is going to return to that
    605 	// PC (because the frame we allocate below has the same
    606 	// size as cgocallback_gofunc's frame declared above)
    607 	// so that the traceback will seamlessly trace back into
    608 	// the earlier calls.
    609 	//
    610 	// In the new goroutine, -8(SP) is unused (where SP refers to
    611 	// m->curg's SP while we're setting it up, before we've adjusted it).
    612 	MOVD	m_curg(R8), g
    613 	BL	runtimesave_g(SB)
    614 	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
    615 	MOVD	(g_sched+gobuf_pc)(g), R5
    616 	MOVD	R5, -24(R4)
    617 	MOVD	ctxt+24(FP), R5
    618 	MOVD	R5, -16(R4)
    619 	MOVD	$-24(R4), R15
    620 	BL	runtimecgocallbackg(SB)
    621 
    622 	// Restore g->sched (== m->curg->sched) from saved values.
    623 	MOVD	0(R15), R5
    624 	MOVD	R5, (g_sched+gobuf_pc)(g)
    625 	MOVD	$24(R15), R4
    626 	MOVD	R4, (g_sched+gobuf_sp)(g)
    627 
    628 	// Switch back to m->g0's stack and restore m->g0->sched.sp.
    629 	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
    630 	// so we do not have to restore it.)
    631 	MOVD	g_m(g), R8
    632 	MOVD	m_g0(R8), g
    633 	BL	runtimesave_g(SB)
    634 	MOVD	(g_sched+gobuf_sp)(g), R15
    635 	MOVD	savedsp-16(SP), R4
    636 	MOVD	R4, (g_sched+gobuf_sp)(g)
    637 
    638 	// If the m on entry was nil, we called needm above to borrow an m
    639 	// for the duration of the call. Since the call is over, return it with dropm.
    640 	MOVD	savedm-8(SP), R6
    641 	CMPBNE	R6, $0, droppedm
    642 	MOVD	$runtimedropm(SB), R3
    643 	BL	(R3)
    644 droppedm:
    645 
    646 	// Done!
    647 	RET
    648 
    649 // void setg(G*); set g. for use by needm.
    650 TEXT runtimesetg(SB), NOSPLIT, $0-8
    651 	MOVD	gg+0(FP), g
    652 	// This only happens if iscgo, so jump straight to save_g
    653 	BL	runtimesave_g(SB)
    654 	RET
    655 
    656 // void setg_gcc(G*); set g in C TLS.
    657 // Must obey the gcc calling convention.
    658 TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
    659 	// The standard prologue clobbers LR (R14), which is callee-save in
    660 	// the C ABI, so we have to use NOFRAME and save LR ourselves.
    661 	MOVD	LR, R1
    662 	// Also save g, R10, and R11 since they're callee-save in C ABI
    663 	MOVD	R10, R3
    664 	MOVD	g, R4
    665 	MOVD	R11, R5
    666 
    667 	MOVD	R2, g
    668 	BL	runtimesave_g(SB)
    669 
    670 	MOVD	R5, R11
    671 	MOVD	R4, g
    672 	MOVD	R3, R10
    673 	MOVD	R1, LR
    674 	RET
    675 
    676 TEXT runtimegetcallerpc(SB),NOSPLIT,$8-16
    677 	MOVD	16(R15), R3		// LR saved by caller
    678 	MOVD	runtimestackBarrierPC(SB), R4
    679 	CMPBNE	R3, R4, nobar
    680 	// Get original return PC.
    681 	BL	runtimenextBarrierPC(SB)
    682 	MOVD	8(R15), R3
    683 nobar:
    684 	MOVD	R3, ret+8(FP)
    685 	RET
    686 
    687 TEXT runtimesetcallerpc(SB),NOSPLIT,$8-16
    688 	MOVD	pc+8(FP), R3
    689 	MOVD	16(R15), R4
    690 	MOVD	runtimestackBarrierPC(SB), R5
    691 	CMPBEQ	R4, R5, setbar
    692 	MOVD	R3, 16(R15)		// set LR in caller
    693 	RET
    694 setbar:
    695 	// Set the stack barrier return PC.
    696 	MOVD	R3, 8(R15)
    697 	BL	runtimesetNextBarrierPC(SB)
    698 	RET
    699 
    700 TEXT runtimeabort(SB),NOSPLIT|NOFRAME,$0-0
    701 	MOVW	(R0), R0
    702 	UNDEF
    703 
    704 // int64 runtimecputicks(void)
    705 TEXT runtimecputicks(SB),NOSPLIT,$0-8
    706 	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
    707 	// This means that since about 1972 the msb has been set, making the
    708 	// result of a call to STORE CLOCK (stck) a negative number.
    709 	// We clear the msb to make it positive.
    710 	STCK	ret+0(FP)      // serialises before and after call
    711 	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
    712 	SLD	$1, R3
    713 	SRD	$1, R3
    714 	MOVD	R3, ret+0(FP)
    715 	RET
    716 
    717 // memhash_varlen(p unsafe.Pointer, h seed) uintptr
    718 // redirects to memhash(p, h, size) using the size
    719 // stored in the closure.
    720 TEXT runtimememhash_varlen(SB),NOSPLIT,$40-24
    721 	GO_ARGS
    722 	NO_LOCAL_POINTERS
    723 	MOVD	p+0(FP), R3
    724 	MOVD	h+8(FP), R4
    725 	MOVD	8(R12), R5
    726 	MOVD	R3, 8(R15)
    727 	MOVD	R4, 16(R15)
    728 	MOVD	R5, 24(R15)
    729 	BL	runtimememhash(SB)
    730 	MOVD	32(R15), R3
    731 	MOVD	R3, ret+16(FP)
    732 	RET
    733 
    734 // AES hashing not implemented for s390x
    735 TEXT runtimeaeshash(SB),NOSPLIT|NOFRAME,$0-0
    736 	MOVW	(R0), R15
    737 TEXT runtimeaeshash32(SB),NOSPLIT|NOFRAME,$0-0
    738 	MOVW	(R0), R15
    739 TEXT runtimeaeshash64(SB),NOSPLIT|NOFRAME,$0-0
    740 	MOVW	(R0), R15
    741 TEXT runtimeaeshashstr(SB),NOSPLIT|NOFRAME,$0-0
    742 	MOVW	(R0), R15
    743 
    744 // memequal(a, b unsafe.Pointer, size uintptr) bool
    745 TEXT runtimememequal(SB),NOSPLIT|NOFRAME,$0-25
    746 	MOVD	a+0(FP), R3
    747 	MOVD	b+8(FP), R5
    748 	MOVD	size+16(FP), R6
    749 	LA	ret+24(FP), R7
    750 	BR	runtimememeqbody(SB)
    751 
    752 // memequal_varlen(a, b unsafe.Pointer) bool
    753 TEXT runtimememequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
    754 	MOVD	a+0(FP), R3
    755 	MOVD	b+8(FP), R5
    756 	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
    757 	LA	ret+16(FP), R7
    758 	BR	runtimememeqbody(SB)
    759 
    760 // eqstring tests whether two strings are equal.
    761 // The compiler guarantees that strings passed
    762 // to eqstring have equal length.
    763 // See runtime_test.go:eqstring_generic for
    764 // equivalent Go code.
    765 TEXT runtimeeqstring(SB),NOSPLIT|NOFRAME,$0-33
    766 	MOVD	s1_base+0(FP), R3
    767 	MOVD	s1_len+8(FP), R6
    768 	MOVD	s2_base+16(FP), R5
    769 	LA	ret+32(FP), R7
    770 	BR	runtimememeqbody(SB)
    771 
    772 TEXT bytesEqual(SB),NOSPLIT|NOFRAME,$0-49
    773 	MOVD	a_len+8(FP), R2
    774 	MOVD	b_len+32(FP), R6
    775 	MOVD	a+0(FP), R3
    776 	MOVD	b+24(FP), R5
    777 	LA	ret+48(FP), R7
    778 	CMPBNE	R2, R6, notequal
    779 	BR	runtimememeqbody(SB)
    780 notequal:
    781 	MOVB	$0, ret+48(FP)
    782 	RET
    783 
    784 // input:
    785 //   R3 = a
    786 //   R5 = b
    787 //   R6 = len
    788 //   R7 = address of output byte (stores 0 or 1 here)
    789 //   a and b have the same length
    790 TEXT runtimememeqbody(SB),NOSPLIT|NOFRAME,$0-0
    791 	CMPBEQ	R3, R5, equal
    792 loop:
    793 	CMPBEQ	R6, $0, equal
    794 	CMPBLT	R6, $32, tiny
    795 	CMP	R6, $256
    796 	BLT	tail
    797 	CLC	$256, 0(R3), 0(R5)
    798 	BNE	notequal
    799 	SUB	$256, R6
    800 	LA	256(R3), R3
    801 	LA	256(R5), R5
    802 	BR	loop
    803 tail:
    804 	SUB	$1, R6, R8
    805 	EXRL	$runtimememeqbodyclc(SB), R8
    806 	BEQ	equal
    807 notequal:
    808 	MOVB	$0, 0(R7)
    809 	RET
    810 equal:
    811 	MOVB	$1, 0(R7)
    812 	RET
    813 tiny:
    814 	MOVD	$0, R2
    815 	CMPBLT	R6, $16, lt16
    816 	MOVD	0(R3), R8
    817 	MOVD	0(R5), R9
    818 	CMPBNE	R8, R9, notequal
    819 	MOVD	8(R3), R8
    820 	MOVD	8(R5), R9
    821 	CMPBNE	R8, R9, notequal
    822 	LA	16(R2), R2
    823 	SUB	$16, R6
    824 lt16:
    825 	CMPBLT	R6, $8, lt8
    826 	MOVD	0(R3)(R2*1), R8
    827 	MOVD	0(R5)(R2*1), R9
    828 	CMPBNE	R8, R9, notequal
    829 	LA	8(R2), R2
    830 	SUB	$8, R6
    831 lt8:
    832 	CMPBLT	R6, $4, lt4
    833 	MOVWZ	0(R3)(R2*1), R8
    834 	MOVWZ	0(R5)(R2*1), R9
    835 	CMPBNE	R8, R9, notequal
    836 	LA	4(R2), R2
    837 	SUB	$4, R6
    838 lt4:
    839 #define CHECK(n) \
    840 	CMPBEQ	R6, $n, equal \
    841 	MOVB	n(R3)(R2*1), R8 \
    842 	MOVB	n(R5)(R2*1), R9 \
    843 	CMPBNE	R8, R9, notequal
    844 	CHECK(0)
    845 	CHECK(1)
    846 	CHECK(2)
    847 	CHECK(3)
    848 	BR	equal
    849 
    850 TEXT runtimememeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
    851 	CLC	$1, 0(R3), 0(R5)
    852 	RET
    853 
    854 TEXT runtimefastrand(SB), NOSPLIT, $0-4
    855 	MOVD	g_m(g), R4
    856 	MOVWZ	m_fastrand(R4), R3
    857 	ADD	R3, R3
    858 	CMPW	R3, $0
    859 	BGE	2(PC)
    860 	XOR	$0x88888eef, R3
    861 	MOVW	R3, m_fastrand(R4)
    862 	MOVW	R3, ret+0(FP)
    863 	RET
    864 
    865 TEXT bytesIndexByte(SB),NOSPLIT|NOFRAME,$0-40
    866 	MOVD	s+0(FP), R3     // s => R3
    867 	MOVD	s_len+8(FP), R4 // s_len => R4
    868 	MOVBZ	c+24(FP), R5    // c => R5
    869 	MOVD	$ret+32(FP), R2 // &ret => R9
    870 	BR	runtimeindexbytebody(SB)
    871 
    872 TEXT stringsIndexByte(SB),NOSPLIT|NOFRAME,$0-32
    873 	MOVD	s+0(FP), R3     // s => R3
    874 	MOVD	s_len+8(FP), R4 // s_len => R4
    875 	MOVBZ	c+16(FP), R5    // c => R5
    876 	MOVD	$ret+24(FP), R2 // &ret => R9
    877 	BR	runtimeindexbytebody(SB)
    878 
    879 // input:
    880 // R3: s
    881 // R4: s_len
    882 // R5: c -- byte sought
    883 // R2: &ret -- address to put index into
    884 TEXT runtimeindexbytebody(SB),NOSPLIT|NOFRAME,$0
    885 	CMPBEQ	R4, $0, notfound
    886 	MOVD	R3, R6          // store base for later
    887 	ADD	R3, R4, R8      // the address after the end of the string
    888 	//if the length is small, use loop; otherwise, use vector or srst search
    889 	CMPBGE	R4, $16, large
    890 
    891 residual:
    892 	CMPBEQ	R3, R8, notfound
    893 	MOVBZ	0(R3), R7
    894 	LA	1(R3), R3
    895 	CMPBNE	R7, R5, residual
    896 
    897 found:
    898 	SUB	R6, R3
    899 	SUB	$1, R3
    900 	MOVD	R3, 0(R2)
    901 	RET
    902 
    903 notfound:
    904 	MOVD	$-1, 0(R2)
    905 	RET
    906 
    907 large:
    908 	MOVBZ	cpu+facilities_hasVX(SB), R1
    909 	CMPBNE	R1, $0, vectorimpl
    910 
    911 srstimpl:                       // no vector facility
    912 	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
    913 srstloop:
    914 	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
    915 	BVS	srstloop        // interrupted - continue
    916 	BGT	notfoundr0
    917 foundr0:
    918 	XOR	R0, R0          // reset R0
    919 	SUB	R6, R8          // remove base
    920 	MOVD	R8, 0(R2)
    921 	RET
    922 notfoundr0:
    923 	XOR	R0, R0          // reset R0
    924 	MOVD	$-1, 0(R2)
    925 	RET
    926 
    927 vectorimpl:
    928 	//if the address is not 16byte aligned, use loop for the header
    929 	MOVD	R3, R8
    930 	AND	$15, R8
    931 	CMPBGT	R8, $0, notaligned
    932 
    933 aligned:
    934 	ADD	R6, R4, R8
    935 	MOVD	R8, R7
    936 	AND	$-16, R7
    937 	// replicate c across V17
    938 	VLVGB	$0, R5, V19
    939 	VREPB	$0, V19, V17
    940 
    941 vectorloop:
    942 	CMPBGE	R3, R7, residual
    943 	VL	0(R3), V16    // load string to be searched into V16
    944 	ADD	$16, R3
    945 	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
    946 	BVS	vectorloop
    947 
    948 	// when vector search found c in the string
    949 	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
    950 	SUB	$16, R3
    951 	SUB	R6, R3
    952 	ADD	R3, R7
    953 	MOVD	R7, 0(R2)
    954 	RET
    955 
    956 notaligned:
    957 	MOVD	R3, R8
    958 	AND	$-16, R8
    959 	ADD     $16, R8
    960 notalignedloop:
    961 	CMPBEQ	R3, R8, aligned
    962 	MOVBZ	0(R3), R7
    963 	LA	1(R3), R3
    964 	CMPBNE	R7, R5, notalignedloop
    965 	BR	found
    966 
    967 TEXT runtimereturn0(SB), NOSPLIT, $0
    968 	MOVW	$0, R3
    969 	RET
    970 
    971 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
    972 // Must obey the gcc calling convention.
    973 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
    974 	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
    975 	MOVD	g, R1
    976 	MOVD	R10, R3
    977 	MOVD	LR, R4
    978 	MOVD	R11, R5
    979 
    980 	BL	runtimeload_g(SB)	// clobbers g (R13), R10, R11
    981 	MOVD	g_m(g), R2
    982 	MOVD	m_curg(R2), R2
    983 	MOVD	(g_stack+stack_hi)(R2), R2
    984 
    985 	MOVD	R1, g
    986 	MOVD	R3, R10
    987 	MOVD	R4, LR
    988 	MOVD	R5, R11
    989 	RET
    990 
    991 // The top-most function running on a goroutine
    992 // returns to goexit+PCQuantum.
    993 TEXT runtimegoexit(SB),NOSPLIT|NOFRAME,$0-0
    994 	BYTE $0x07; BYTE $0x00; // 2-byte nop
    995 	BL	runtimegoexit1(SB)	// does not return
    996 	// traceback from goexit1 must hit code range of goexit
    997 	BYTE $0x07; BYTE $0x00; // 2-byte nop
    998 
    999 TEXT runtimeprefetcht0(SB),NOSPLIT,$0-8
   1000 	RET
   1001 
   1002 TEXT runtimeprefetcht1(SB),NOSPLIT,$0-8
   1003 	RET
   1004 
   1005 TEXT runtimeprefetcht2(SB),NOSPLIT,$0-8
   1006 	RET
   1007 
   1008 TEXT runtimeprefetchnta(SB),NOSPLIT,$0-8
   1009 	RET
   1010 
   1011 TEXT runtimesigreturn(SB),NOSPLIT,$0-0
   1012 	RET
   1013 
   1014 TEXT publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   1015 	SYNC
   1016 	RET
   1017 
   1018 TEXT runtimecmpstring(SB),NOSPLIT|NOFRAME,$0-40
   1019 	MOVD	s1_base+0(FP), R3
   1020 	MOVD	s1_len+8(FP), R4
   1021 	MOVD	s2_base+16(FP), R5
   1022 	MOVD	s2_len+24(FP), R6
   1023 	LA	ret+32(FP), R7
   1024 	BR	runtimecmpbody(SB)
   1025 
   1026 TEXT bytesCompare(SB),NOSPLIT|NOFRAME,$0-56
   1027 	MOVD	s1+0(FP), R3
   1028 	MOVD	s1+8(FP), R4
   1029 	MOVD	s2+24(FP), R5
   1030 	MOVD	s2+32(FP), R6
   1031 	LA	res+48(FP), R7
   1032 	BR	runtimecmpbody(SB)
   1033 
   1034 // input:
   1035 //   R3 = a
   1036 //   R4 = alen
   1037 //   R5 = b
   1038 //   R6 = blen
   1039 //   R7 = address of output word (stores -1/0/1 here)
   1040 TEXT runtimecmpbody(SB),NOSPLIT|NOFRAME,$0-0
   1041 	CMPBEQ	R3, R5, cmplengths
   1042 	MOVD	R4, R8
   1043 	CMPBLE	R4, R6, amin
   1044 	MOVD	R6, R8
   1045 amin:
   1046 	CMPBEQ	R8, $0, cmplengths
   1047 	CMP	R8, $256
   1048 	BLE	tail
   1049 loop:
   1050 	CLC	$256, 0(R3), 0(R5)
   1051 	BGT	gt
   1052 	BLT	lt
   1053 	SUB	$256, R8
   1054 	CMP	R8, $256
   1055 	BGT	loop
   1056 tail:
   1057 	SUB	$1, R8
   1058 	EXRL	$runtimecmpbodyclc(SB), R8
   1059 	BGT	gt
   1060 	BLT	lt
   1061 cmplengths:
   1062 	CMP	R4, R6
   1063 	BEQ	eq
   1064 	BLT	lt
   1065 gt:
   1066 	MOVD	$1, 0(R7)
   1067 	RET
   1068 lt:
   1069 	MOVD	$-1, 0(R7)
   1070 	RET
   1071 eq:
   1072 	MOVD	$0, 0(R7)
   1073 	RET
   1074 
   1075 TEXT runtimecmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   1076 	CLC	$1, 0(R3), 0(R5)
   1077 	RET
   1078 
   1079 // func supportsVX() bool
   1080 TEXT stringssupportsVX(SB),NOSPLIT,$0-1
   1081 	MOVBZ	runtimecpu+facilities_hasVX(SB), R0
   1082 	MOVB	R0, ret+0(FP)
   1083 	RET
   1084 
   1085 // func supportsVX() bool
   1086 TEXT bytessupportsVX(SB),NOSPLIT,$0-1
   1087 	MOVBZ	runtimecpu+facilities_hasVX(SB), R0
   1088 	MOVB	R0, ret+0(FP)
   1089 	RET
   1090 
   1091 // func indexShortStr(s, sep string) int
   1092 // Caller must confirm availability of vx facility before calling.
   1093 TEXT stringsindexShortStr(SB),NOSPLIT|NOFRAME,$0-40
   1094 	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
   1095 	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
   1096 	MOVD	$ret+32(FP), R5
   1097 	BR	runtimeindexShortStr(SB)
   1098 
   1099 // func indexShortStr(s, sep []byte) int
   1100 // Caller must confirm availability of vx facility before calling.
   1101 TEXT bytesindexShortStr(SB),NOSPLIT|NOFRAME,$0-56
   1102 	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
   1103 	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
   1104 	MOVD	$ret+48(FP), R5
   1105 	BR	runtimeindexShortStr(SB)
   1106 
   1107 // s: string we are searching
   1108 // sep: string to search for
   1109 // R1=&s[0], R2=len(s)
   1110 // R3=&sep[0], R4=len(sep)
   1111 // R5=&ret (int)
   1112 // Caller must confirm availability of vx facility before calling.
   1113 TEXT runtimeindexShortStr(SB),NOSPLIT|NOFRAME,$0
   1114 	CMPBGT	R4, R2, notfound
   1115 	ADD	R1, R2
   1116 	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
   1117 	CMPBEQ	R4, $0, notfound
   1118 	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
   1119 	VLL	R4, (R3), V0 // contains first 16 bytes of sep
   1120 	MOVD	R1, R7
   1121 index2plus:
   1122 	CMPBNE	R4, $1, index3plus
   1123 	MOVD	$15(R7), R9
   1124 	CMPBGE	R9, R2, index2to16
   1125 	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
   1126 	VONE	V16
   1127 	VREPH	$0, V0, V1
   1128 	CMPBGE	R9, R2, index2to16
   1129 index2loop:
   1130 	VL	0(R7), V2          // 16 bytes, even indices
   1131 	VL	1(R7), V4          // 16 bytes, odd indices
   1132 	VCEQH	V1, V2, V5         // compare even indices
   1133 	VCEQH	V1, V4, V6         // compare odd indices
   1134 	VSEL	V5, V6, V31, V7    // merge even and odd indices
   1135 	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   1136 	BLT	foundV17
   1137 	MOVD	$16(R7), R7        // R7+=16
   1138 	ADD	$15, R7, R9
   1139 	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
   1140 	CMPBLE	R7, R2, index2to16
   1141 	BR	notfound
   1142 
   1143 index3plus:
   1144 	CMPBNE	R4, $2, index4plus
   1145 	ADD	$15, R7, R9
   1146 	CMPBGE	R9, R2, index2to16
   1147 	MOVD	$1, R0
   1148 	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
   1149 	VONE	V16
   1150 	VREPH	$0, V0, V1
   1151 	VREPB	$2, V0, V8
   1152 index3loop:
   1153 	VL	(R7), V2           // load 16-bytes into V2
   1154 	VLL	R0, 16(R7), V3     // load 2-bytes into V3
   1155 	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
   1156 	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
   1157 	VCEQH	V1, V2, V5         // compare 2-byte even indices
   1158 	VCEQH	V1, V4, V6         // compare 2-byte odd indices
   1159 	VCEQB	V8, V9, V10        // compare last bytes
   1160 	VSEL	V5, V6, V31, V7    // merge even and odd indices
   1161 	VN	V7, V10, V7        // AND indices with last byte
   1162 	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   1163 	BLT	foundV17
   1164 	MOVD	$16(R7), R7        // R7+=16
   1165 	ADD	$15, R7, R9
   1166 	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
   1167 	CMPBLE	R7, R2, index2to16
   1168 	BR	notfound
   1169 
   1170 index4plus:
   1171 	CMPBNE	R4, $3, index5plus
   1172 	ADD	$15, R7, R9
   1173 	CMPBGE	R9, R2, index2to16
   1174 	MOVD	$2, R0
   1175 	VGBM	$0x8888, V29       // 0xff000000ff000000...
   1176 	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
   1177 	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
   1178 	VONE	V16
   1179 	VREPF	$0, V0, V1
   1180 index4loop:
   1181 	VL	(R7), V2           // load 16-bytes into V2
   1182 	VLL	R0, 16(R7), V3     // load 3-bytes into V3
   1183 	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
   1184 	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
   1185 	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
   1186 	VCEQF	V1, V2, V5         // compare index 0, 4, ...
   1187 	VCEQF	V1, V4, V6         // compare index 1, 5, ...
   1188 	VCEQF	V1, V9, V11        // compare index 2, 6, ...
   1189 	VCEQF	V1, V10, V12       // compare index 3, 7, ...
   1190 	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
   1191 	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
   1192 	VSEL	V13, V14, V31, V7  // final merge
   1193 	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   1194 	BLT	foundV17
   1195 	MOVD	$16(R7), R7        // R7+=16
   1196 	ADD	$15, R7, R9
   1197 	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
   1198 	CMPBLE	R7, R2, index2to16
   1199 	BR	notfound
   1200 
   1201 index5plus:
   1202 	CMPBGT	R4, $15, index17plus
   1203 index2to16:
   1204 	CMPBGT	R7, R2, notfound
   1205 	MOVD	$1(R7), R8
   1206 	CMPBGT	R8, R2, index2to16tail
   1207 index2to16loop:
   1208 	// unrolled 2x
   1209 	VLL	R4, (R7), V1
   1210 	VLL	R4, 1(R7), V2
   1211 	VCEQGS	V0, V1, V3
   1212 	BEQ	found
   1213 	MOVD	$1(R7), R7
   1214 	VCEQGS	V0, V2, V4
   1215 	BEQ	found
   1216 	MOVD	$1(R7), R7
   1217 	CMPBLT	R7, R2, index2to16loop
   1218 	CMPBGT	R7, R2, notfound
   1219 index2to16tail:
   1220 	VLL	R4, (R7), V1
   1221 	VCEQGS	V0, V1, V2
   1222 	BEQ	found
   1223 	BR	notfound
   1224 
   1225 index17plus:
   1226 	CMPBGT	R4, $31, index33plus
   1227 	SUB	$16, R4, R0
   1228 	VLL	R0, 16(R3), V1
   1229 	VONE	V7
   1230 index17to32loop:
   1231 	VL	(R7), V2
   1232 	VLL	R0, 16(R7), V3
   1233 	VCEQG	V0, V2, V4
   1234 	VCEQG	V1, V3, V5
   1235 	VN	V4, V5, V6
   1236 	VCEQGS	V6, V7, V8
   1237 	BEQ	found
   1238 	MOVD	$1(R7), R7
   1239 	CMPBLE  R7, R2, index17to32loop
   1240 	BR	notfound
   1241 
   1242 index33plus:
   1243 	CMPBGT	R4, $47, index49plus
   1244 	SUB	$32, R4, R0
   1245 	VL	16(R3), V1
   1246 	VLL	R0, 32(R3), V2
   1247 	VONE	V11
   1248 index33to48loop:
   1249 	VL	(R7), V3
   1250 	VL	16(R7), V4
   1251 	VLL	R0, 32(R7), V5
   1252 	VCEQG	V0, V3, V6
   1253 	VCEQG	V1, V4, V7
   1254 	VCEQG	V2, V5, V8
   1255 	VN	V6, V7, V9
   1256 	VN	V8, V9, V10
   1257 	VCEQGS	V10, V11, V12
   1258 	BEQ	found
   1259 	MOVD	$1(R7), R7
   1260 	CMPBLE  R7, R2, index33to48loop
   1261 	BR	notfound
   1262 
   1263 index49plus:
   1264 	CMPBGT	R4, $63, index65plus
   1265 	SUB	$48, R4, R0
   1266 	VL	16(R3), V1
   1267 	VL	32(R3), V2
   1268 	VLL	R0, 48(R3), V3
   1269 	VONE	V15
   1270 index49to64loop:
   1271 	VL	(R7), V4
   1272 	VL	16(R7), V5
   1273 	VL	32(R7), V6
   1274 	VLL	R0, 48(R7), V7
   1275 	VCEQG	V0, V4, V8
   1276 	VCEQG	V1, V5, V9
   1277 	VCEQG	V2, V6, V10
   1278 	VCEQG	V3, V7, V11
   1279 	VN	V8, V9, V12
   1280 	VN	V10, V11, V13
   1281 	VN	V12, V13, V14
   1282 	VCEQGS	V14, V15, V16
   1283 	BEQ	found
   1284 	MOVD	$1(R7), R7
   1285 	CMPBLE  R7, R2, index49to64loop
   1286 notfound:
   1287 	MOVD	$-1, (R5)
   1288 	RET
   1289 
   1290 index65plus:
   1291 	// not implemented
   1292 	MOVD	$0, (R0)
   1293 	RET
   1294 
   1295 foundV17: // index is in doubleword V17[0]
   1296 	VLGVG	$0, V17, R8
   1297 	ADD	R8, R7
   1298 found:
   1299 	SUB	R1, R7
   1300 	MOVD	R7, (R5)
   1301 	RET
   1302 
   1303 // This is called from .init_array and follows the platform, not Go, ABI.
   1304 // We are overly conservative. We could only save the registers we use.
   1305 // However, since this function is only called once per loaded module
   1306 // performance is unimportant.
   1307 TEXT runtimeaddmoduledata(SB),NOSPLIT|NOFRAME,$0-0
   1308 	// Save R6-R15 in the register save area of the calling function.
   1309 	// Don't bother saving F8-F15 as we aren't doing any calls.
   1310 	STMG	R6, R15, 48(R15)
   1311 
   1312 	// append the argument (passed in R2, as per the ELF ABI) to the
   1313 	// moduledata linked list.
   1314 	MOVD	runtimelastmoduledatap(SB), R1
   1315 	MOVD	R2, moduledata_next(R1)
   1316 	MOVD	R2, runtimelastmoduledatap(SB)
   1317 
   1318 	// Restore R6-R15.
   1319 	LMG	48(R15), R6, R15
   1320 	RET
   1321 
   1322 TEXT checkASM(SB),NOSPLIT,$0-1
   1323 	MOVB	$1, ret+0(FP)
   1324 	RET
   1325