Home | History | Annotate | Download | only in runtime
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "go_asm.h"
      6 #include "go_tls.h"
      7 #include "funcdata.h"
      8 #include "textflag.h"
      9 
     10 // _rt0_s390x_lib is common startup code for s390x systems when
     11 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
     12 // arrange to invoke this function as a global constructor (for
     13 // c-archive) or when the shared library is loaded (for c-shared).
     14 // We expect argc and argv to be passed in the usual C ABI registers
     15 // R2 and R3.
     16 TEXT _rt0_s390x_lib(SB), NOSPLIT|NOFRAME, $0
     17 	STMG	R6, R15, 48(R15)
     18 	MOVD	R2, _rt0_s390x_lib_argc<>(SB)
     19 	MOVD	R3, _rt0_s390x_lib_argv<>(SB)
     20 
     21 	// Save R6-R15 in the register save area of the calling function.
     22 	STMG	R6, R15, 48(R15)
     23 
     24 	// Allocate 80 bytes on the stack.
     25 	MOVD	$-80(R15), R15
     26 
     27 	// Save F8-F15 in our stack frame.
     28 	FMOVD	F8, 16(R15)
     29 	FMOVD	F9, 24(R15)
     30 	FMOVD	F10, 32(R15)
     31 	FMOVD	F11, 40(R15)
     32 	FMOVD	F12, 48(R15)
     33 	FMOVD	F13, 56(R15)
     34 	FMOVD	F14, 64(R15)
     35 	FMOVD	F15, 72(R15)
     36 
     37 	// Synchronous initialization.
     38 	MOVD	$runtimelibpreinit(SB), R1
     39 	BL	R1
     40 
     41 	// Create a new thread to finish Go runtime initialization.
     42 	MOVD	_cgo_sys_thread_create(SB), R1
     43 	CMP	R1, $0
     44 	BEQ	nocgo
     45 	MOVD	$_rt0_s390x_lib_go(SB), R2
     46 	MOVD	$0, R3
     47 	BL	R1
     48 	BR	restore
     49 
     50 nocgo:
     51 	MOVD	$0x800000, R1              // stacksize
     52 	MOVD	R1, 0(R15)
     53 	MOVD	$_rt0_s390x_lib_go(SB), R1
     54 	MOVD	R1, 8(R15)                 // fn
     55 	MOVD	$runtimenewosproc(SB), R1
     56 	BL	R1
     57 
     58 restore:
     59 	// Restore F8-F15 from our stack frame.
     60 	FMOVD	16(R15), F8
     61 	FMOVD	24(R15), F9
     62 	FMOVD	32(R15), F10
     63 	FMOVD	40(R15), F11
     64 	FMOVD	48(R15), F12
     65 	FMOVD	56(R15), F13
     66 	FMOVD	64(R15), F14
     67 	FMOVD	72(R15), F15
     68 	MOVD	$80(R15), R15
     69 
     70 	// Restore R6-R15.
     71 	LMG	48(R15), R6, R15
     72 	RET
     73 
     74 // _rt0_s390x_lib_go initializes the Go runtime.
     75 // This is started in a separate thread by _rt0_s390x_lib.
     76 TEXT _rt0_s390x_lib_go(SB), NOSPLIT|NOFRAME, $0
     77 	MOVD	_rt0_s390x_lib_argc<>(SB), R2
     78 	MOVD	_rt0_s390x_lib_argv<>(SB), R3
     79 	MOVD	$runtimert0_go(SB), R1
     80 	BR	R1
     81 
     82 DATA _rt0_s390x_lib_argc<>(SB)/8, $0
     83 GLOBL _rt0_s390x_lib_argc<>(SB), NOPTR, $8
     84 DATA _rt0_s90x_lib_argv<>(SB)/8, $0
     85 GLOBL _rt0_s390x_lib_argv<>(SB), NOPTR, $8
     86 
     87 TEXT runtimert0_go(SB),NOSPLIT,$0
     88 	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
     89 	// C TLS base pointer in AR0:AR1
     90 
     91 	// initialize essential registers
     92 	XOR	R0, R0
     93 
     94 	SUB	$24, R15
     95 	MOVW	R2, 8(R15) // argc
     96 	MOVD	R3, 16(R15) // argv
     97 
     98 	// create istack out of the given (operating system) stack.
     99 	// _cgo_init may update stackguard.
    100 	MOVD	$runtimeg0(SB), g
    101 	MOVD	R15, R11
    102 	SUB	$(64*1024), R11
    103 	MOVD	R11, g_stackguard0(g)
    104 	MOVD	R11, g_stackguard1(g)
    105 	MOVD	R11, (g_stack+stack_lo)(g)
    106 	MOVD	R15, (g_stack+stack_hi)(g)
    107 
    108 	// if there is a _cgo_init, call it using the gcc ABI.
    109 	MOVD	_cgo_init(SB), R11
    110 	CMPBEQ	R11, $0, nocgo
    111 	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    112 	SLD	$32, R4, R4
    113 	MOVW	AR1, R4			// arg 2: TLS base pointer
    114 	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    115 	MOVD	g, R2			// arg 0: G
    116 	// C functions expect 160 bytes of space on caller stack frame
    117 	// and an 8-byte aligned stack pointer
    118 	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    119 	SUB	$160, R15		// reserve 160 bytes
    120 	MOVD    $~7, R6
    121 	AND 	R6, R15			// 8-byte align
    122 	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    123 	MOVD	R9, R15			// restore stack
    124 	XOR	R0, R0			// zero R0
    125 
    126 nocgo:
    127 	// update stackguard after _cgo_init
    128 	MOVD	(g_stack+stack_lo)(g), R2
    129 	ADD	$const__StackGuard, R2
    130 	MOVD	R2, g_stackguard0(g)
    131 	MOVD	R2, g_stackguard1(g)
    132 
    133 	// set the per-goroutine and per-mach "registers"
    134 	MOVD	$runtimem0(SB), R2
    135 
    136 	// save m->g0 = g0
    137 	MOVD	g, m_g0(R2)
    138 	// save m0 to g0->m
    139 	MOVD	R2, g_m(g)
    140 
    141 	BL	runtimecheck(SB)
    142 
    143 	// argc/argv are already prepared on stack
    144 	BL	runtimeargs(SB)
    145 	BL	runtimeosinit(SB)
    146 	BL	runtimeschedinit(SB)
    147 
    148 	// create a new goroutine to start program
    149 	MOVD	$runtimemainPC(SB), R2		// entry
    150 	SUB     $24, R15
    151 	MOVD 	R2, 16(R15)
    152 	MOVD 	$0, 8(R15)
    153 	MOVD 	$0, 0(R15)
    154 	BL	runtimenewproc(SB)
    155 	ADD	$24, R15
    156 
    157 	// start this M
    158 	BL	runtimemstart(SB)
    159 
    160 	MOVD	$0, 1(R0)
    161 	RET
    162 
    163 DATA	runtimemainPC+0(SB)/8,$runtimemain(SB)
    164 GLOBL	runtimemainPC(SB),RODATA,$8
    165 
    166 TEXT runtimebreakpoint(SB),NOSPLIT|NOFRAME,$0-0
    167 	MOVD	$0, 2(R0)
    168 	RET
    169 
    170 TEXT runtimeasminit(SB),NOSPLIT|NOFRAME,$0-0
    171 	RET
    172 
    173 /*
    174  *  go-routine
    175  */
    176 
    177 // void gosave(Gobuf*)
    178 // save state in Gobuf; setjmp
    179 TEXT runtimegosave(SB), NOSPLIT, $-8-8
    180 	MOVD	buf+0(FP), R3
    181 	MOVD	R15, gobuf_sp(R3)
    182 	MOVD	LR, gobuf_pc(R3)
    183 	MOVD	g, gobuf_g(R3)
    184 	MOVD	$0, gobuf_lr(R3)
    185 	MOVD	$0, gobuf_ret(R3)
    186 	// Assert ctxt is zero. See func save.
    187 	MOVD	gobuf_ctxt(R3), R3
    188 	CMPBEQ	R3, $0, 2(PC)
    189 	BL	runtimebadctxt(SB)
    190 	RET
    191 
    192 // void gogo(Gobuf*)
    193 // restore state from Gobuf; longjmp
    194 TEXT runtimegogo(SB), NOSPLIT, $16-8
    195 	MOVD	buf+0(FP), R5
    196 	MOVD	gobuf_g(R5), g	// make sure g is not nil
    197 	BL	runtimesave_g(SB)
    198 
    199 	MOVD	0(g), R4
    200 	MOVD	gobuf_sp(R5), R15
    201 	MOVD	gobuf_lr(R5), LR
    202 	MOVD	gobuf_ret(R5), R3
    203 	MOVD	gobuf_ctxt(R5), R12
    204 	MOVD	$0, gobuf_sp(R5)
    205 	MOVD	$0, gobuf_ret(R5)
    206 	MOVD	$0, gobuf_lr(R5)
    207 	MOVD	$0, gobuf_ctxt(R5)
    208 	CMP	R0, R0 // set condition codes for == test, needed by stack split
    209 	MOVD	gobuf_pc(R5), R6
    210 	BR	(R6)
    211 
    212 // void mcall(fn func(*g))
    213 // Switch to m->g0's stack, call fn(g).
    214 // Fn must never return.  It should gogo(&g->sched)
    215 // to keep running g.
    216 TEXT runtimemcall(SB), NOSPLIT, $-8-8
    217 	// Save caller state in g->sched
    218 	MOVD	R15, (g_sched+gobuf_sp)(g)
    219 	MOVD	LR, (g_sched+gobuf_pc)(g)
    220 	MOVD	$0, (g_sched+gobuf_lr)(g)
    221 	MOVD	g, (g_sched+gobuf_g)(g)
    222 
    223 	// Switch to m->g0 & its stack, call fn.
    224 	MOVD	g, R3
    225 	MOVD	g_m(g), R8
    226 	MOVD	m_g0(R8), g
    227 	BL	runtimesave_g(SB)
    228 	CMP	g, R3
    229 	BNE	2(PC)
    230 	BR	runtimebadmcall(SB)
    231 	MOVD	fn+0(FP), R12			// context
    232 	MOVD	0(R12), R4			// code pointer
    233 	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
    234 	SUB	$16, R15
    235 	MOVD	R3, 8(R15)
    236 	MOVD	$0, 0(R15)
    237 	BL	(R4)
    238 	BR	runtimebadmcall2(SB)
    239 
    240 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
    241 // of the G stack.  We need to distinguish the routine that
    242 // lives at the bottom of the G stack from the one that lives
    243 // at the top of the system stack because the one at the top of
    244 // the system stack terminates the stack walk (see topofstack()).
    245 TEXT runtimesystemstack_switch(SB), NOSPLIT, $0-0
    246 	UNDEF
    247 	BL	(LR)	// make sure this function is not leaf
    248 	RET
    249 
    250 // func systemstack(fn func())
    251 TEXT runtimesystemstack(SB), NOSPLIT, $0-8
    252 	MOVD	fn+0(FP), R3	// R3 = fn
    253 	MOVD	R3, R12		// context
    254 	MOVD	g_m(g), R4	// R4 = m
    255 
    256 	MOVD	m_gsignal(R4), R5	// R5 = gsignal
    257 	CMPBEQ	g, R5, noswitch
    258 
    259 	MOVD	m_g0(R4), R5	// R5 = g0
    260 	CMPBEQ	g, R5, noswitch
    261 
    262 	MOVD	m_curg(R4), R6
    263 	CMPBEQ	g, R6, switch
    264 
    265 	// Bad: g is not gsignal, not g0, not curg. What is it?
    266 	// Hide call from linker nosplit analysis.
    267 	MOVD	$runtimebadsystemstack(SB), R3
    268 	BL	(R3)
    269 
    270 switch:
    271 	// save our state in g->sched.  Pretend to
    272 	// be systemstack_switch if the G stack is scanned.
    273 	MOVD	$runtimesystemstack_switch(SB), R6
    274 	ADD	$16, R6	// get past prologue
    275 	MOVD	R6, (g_sched+gobuf_pc)(g)
    276 	MOVD	R15, (g_sched+gobuf_sp)(g)
    277 	MOVD	$0, (g_sched+gobuf_lr)(g)
    278 	MOVD	g, (g_sched+gobuf_g)(g)
    279 
    280 	// switch to g0
    281 	MOVD	R5, g
    282 	BL	runtimesave_g(SB)
    283 	MOVD	(g_sched+gobuf_sp)(g), R3
    284 	// make it look like mstart called systemstack on g0, to stop traceback
    285 	SUB	$8, R3
    286 	MOVD	$runtimemstart(SB), R4
    287 	MOVD	R4, 0(R3)
    288 	MOVD	R3, R15
    289 
    290 	// call target function
    291 	MOVD	0(R12), R3	// code pointer
    292 	BL	(R3)
    293 
    294 	// switch back to g
    295 	MOVD	g_m(g), R3
    296 	MOVD	m_curg(R3), g
    297 	BL	runtimesave_g(SB)
    298 	MOVD	(g_sched+gobuf_sp)(g), R15
    299 	MOVD	$0, (g_sched+gobuf_sp)(g)
    300 	RET
    301 
    302 noswitch:
    303 	// already on m stack, just call directly
    304 	// Using a tail call here cleans up tracebacks since we won't stop
    305 	// at an intermediate systemstack.
    306 	MOVD	0(R12), R3	// code pointer
    307 	MOVD	0(R15), LR	// restore LR
    308 	ADD	$8, R15
    309 	BR	(R3)
    310 
    311 /*
    312  * support for morestack
    313  */
    314 
    315 // Called during function prolog when more stack is needed.
    316 // Caller has already loaded:
    317 // R3: framesize, R4: argsize, R5: LR
    318 //
    319 // The traceback routines see morestack on a g0 as being
    320 // the top of a stack (for example, morestack calling newstack
    321 // calling the scheduler calling newm calling gc), so we must
    322 // record an argument size. For that purpose, it has no arguments.
    323 TEXT runtimemorestack(SB),NOSPLIT|NOFRAME,$0-0
    324 	// Cannot grow scheduler stack (m->g0).
    325 	MOVD	g_m(g), R7
    326 	MOVD	m_g0(R7), R8
    327 	CMPBNE	g, R8, 3(PC)
    328 	BL	runtimebadmorestackg0(SB)
    329 	BL	runtimeabort(SB)
    330 
    331 	// Cannot grow signal stack (m->gsignal).
    332 	MOVD	m_gsignal(R7), R8
    333 	CMP	g, R8
    334 	BNE	3(PC)
    335 	BL	runtimebadmorestackgsignal(SB)
    336 	BL	runtimeabort(SB)
    337 
    338 	// Called from f.
    339 	// Set g->sched to context in f.
    340 	MOVD	R15, (g_sched+gobuf_sp)(g)
    341 	MOVD	LR, R8
    342 	MOVD	R8, (g_sched+gobuf_pc)(g)
    343 	MOVD	R5, (g_sched+gobuf_lr)(g)
    344 	MOVD	R12, (g_sched+gobuf_ctxt)(g)
    345 
    346 	// Called from f.
    347 	// Set m->morebuf to f's caller.
    348 	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
    349 	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
    350 	MOVD	g, (m_morebuf+gobuf_g)(R7)
    351 
    352 	// Call newstack on m->g0's stack.
    353 	MOVD	m_g0(R7), g
    354 	BL	runtimesave_g(SB)
    355 	MOVD	(g_sched+gobuf_sp)(g), R15
    356 	// Create a stack frame on g0 to call newstack.
    357 	MOVD	$0, -8(R15)	// Zero saved LR in frame
    358 	SUB	$8, R15
    359 	BL	runtimenewstack(SB)
    360 
    361 	// Not reached, but make sure the return PC from the call to newstack
    362 	// is still in this function, and not the beginning of the next.
    363 	UNDEF
    364 
    365 TEXT runtimemorestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
    366 	MOVD	$0, R12
    367 	BR	runtimemorestack(SB)
    368 
    369 // reflectcall: call a function with the given argument list
    370 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
    371 // we don't have variable-sized frames, so we use a small number
    372 // of constant-sized-frame functions to encode a few bits of size in the pc.
    373 // Caution: ugly multiline assembly macros in your future!
    374 
    375 #define DISPATCH(NAME,MAXSIZE)		\
    376 	MOVD	$MAXSIZE, R4;		\
    377 	CMP	R3, R4;		\
    378 	BGT	3(PC);			\
    379 	MOVD	$NAME(SB), R5;	\
    380 	BR	(R5)
    381 // Note: can't just "BR NAME(SB)" - bad inlining results.
    382 
    383 TEXT reflectcall(SB), NOSPLIT, $0-0
    384 	BR	reflectcall(SB)
    385 
    386 TEXT reflectcall(SB), NOSPLIT, $-8-32
    387 	MOVWZ argsize+24(FP), R3
    388 	DISPATCH(runtimecall32, 32)
    389 	DISPATCH(runtimecall64, 64)
    390 	DISPATCH(runtimecall128, 128)
    391 	DISPATCH(runtimecall256, 256)
    392 	DISPATCH(runtimecall512, 512)
    393 	DISPATCH(runtimecall1024, 1024)
    394 	DISPATCH(runtimecall2048, 2048)
    395 	DISPATCH(runtimecall4096, 4096)
    396 	DISPATCH(runtimecall8192, 8192)
    397 	DISPATCH(runtimecall16384, 16384)
    398 	DISPATCH(runtimecall32768, 32768)
    399 	DISPATCH(runtimecall65536, 65536)
    400 	DISPATCH(runtimecall131072, 131072)
    401 	DISPATCH(runtimecall262144, 262144)
    402 	DISPATCH(runtimecall524288, 524288)
    403 	DISPATCH(runtimecall1048576, 1048576)
    404 	DISPATCH(runtimecall2097152, 2097152)
    405 	DISPATCH(runtimecall4194304, 4194304)
    406 	DISPATCH(runtimecall8388608, 8388608)
    407 	DISPATCH(runtimecall16777216, 16777216)
    408 	DISPATCH(runtimecall33554432, 33554432)
    409 	DISPATCH(runtimecall67108864, 67108864)
    410 	DISPATCH(runtimecall134217728, 134217728)
    411 	DISPATCH(runtimecall268435456, 268435456)
    412 	DISPATCH(runtimecall536870912, 536870912)
    413 	DISPATCH(runtimecall1073741824, 1073741824)
    414 	MOVD	$runtimebadreflectcall(SB), R5
    415 	BR	(R5)
    416 
    417 #define CALLFN(NAME,MAXSIZE)			\
    418 TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
    419 	NO_LOCAL_POINTERS;			\
    420 	/* copy arguments to stack */		\
    421 	MOVD	arg+16(FP), R4;			\
    422 	MOVWZ	argsize+24(FP), R5;		\
    423 	MOVD	$stack-MAXSIZE(SP), R6;		\
    424 loopArgs: /* copy 256 bytes at a time */	\
    425 	CMP	R5, $256;			\
    426 	BLT	tailArgs;			\
    427 	SUB	$256, R5;			\
    428 	MVC	$256, 0(R4), 0(R6);		\
    429 	MOVD	$256(R4), R4;			\
    430 	MOVD	$256(R6), R6;			\
    431 	BR	loopArgs;			\
    432 tailArgs: /* copy remaining bytes */		\
    433 	CMP	R5, $0;				\
    434 	BEQ	callFunction;			\
    435 	SUB	$1, R5;				\
    436 	EXRL	$callfnMVC<>(SB), R5;		\
    437 callFunction:					\
    438 	MOVD	f+8(FP), R12;			\
    439 	MOVD	(R12), R8;			\
    440 	PCDATA  $PCDATA_StackMapIndex, $0;	\
    441 	BL	(R8);				\
    442 	/* copy return values back */		\
    443 	MOVD	argtype+0(FP), R7;		\
    444 	MOVD	arg+16(FP), R6;			\
    445 	MOVWZ	n+24(FP), R5;			\
    446 	MOVD	$stack-MAXSIZE(SP), R4;		\
    447 	MOVWZ	retoffset+28(FP), R1;		\
    448 	ADD	R1, R4;				\
    449 	ADD	R1, R6;				\
    450 	SUB	R1, R5;				\
    451 	BL	callRet<>(SB);			\
    452 	RET
    453 
    454 // callRet copies return values back at the end of call*. This is a
    455 // separate function so it can allocate stack space for the arguments
    456 // to reflectcallmove. It does not follow the Go ABI; it expects its
    457 // arguments in registers.
    458 TEXT callRet<>(SB), NOSPLIT, $32-0
    459 	MOVD	R7, 8(R15)
    460 	MOVD	R6, 16(R15)
    461 	MOVD	R4, 24(R15)
    462 	MOVD	R5, 32(R15)
    463 	BL	runtimereflectcallmove(SB)
    464 	RET
    465 
    466 CALLFN(call32, 32)
    467 CALLFN(call64, 64)
    468 CALLFN(call128, 128)
    469 CALLFN(call256, 256)
    470 CALLFN(call512, 512)
    471 CALLFN(call1024, 1024)
    472 CALLFN(call2048, 2048)
    473 CALLFN(call4096, 4096)
    474 CALLFN(call8192, 8192)
    475 CALLFN(call16384, 16384)
    476 CALLFN(call32768, 32768)
    477 CALLFN(call65536, 65536)
    478 CALLFN(call131072, 131072)
    479 CALLFN(call262144, 262144)
    480 CALLFN(call524288, 524288)
    481 CALLFN(call1048576, 1048576)
    482 CALLFN(call2097152, 2097152)
    483 CALLFN(call4194304, 4194304)
    484 CALLFN(call8388608, 8388608)
    485 CALLFN(call16777216, 16777216)
    486 CALLFN(call33554432, 33554432)
    487 CALLFN(call67108864, 67108864)
    488 CALLFN(call134217728, 134217728)
    489 CALLFN(call268435456, 268435456)
    490 CALLFN(call536870912, 536870912)
    491 CALLFN(call1073741824, 1073741824)
    492 
    493 // Not a function: target for EXRL (execute relative long) instruction.
    494 TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
    495 	MVC	$1, 0(R4), 0(R6)
    496 
    497 TEXT runtimeprocyield(SB),NOSPLIT,$0-0
    498 	RET
    499 
    500 // void jmpdefer(fv, sp);
    501 // called from deferreturn.
    502 // 1. grab stored LR for caller
    503 // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
    504 // 3. BR to fn
    505 TEXT runtimejmpdefer(SB),NOSPLIT|NOFRAME,$0-16
    506 	MOVD	0(R15), R1
    507 	SUB	$6, R1, LR
    508 
    509 	MOVD	fv+0(FP), R12
    510 	MOVD	argp+8(FP), R15
    511 	SUB	$8, R15
    512 	MOVD	0(R12), R3
    513 	BR	(R3)
    514 
    515 // Save state of caller into g->sched. Smashes R1.
    516 TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
    517 	MOVD	LR, (g_sched+gobuf_pc)(g)
    518 	MOVD	R15, (g_sched+gobuf_sp)(g)
    519 	MOVD	$0, (g_sched+gobuf_lr)(g)
    520 	MOVD	$0, (g_sched+gobuf_ret)(g)
    521 	// Assert ctxt is zero. See func save.
    522 	MOVD	(g_sched+gobuf_ctxt)(g), R1
    523 	CMPBEQ	R1, $0, 2(PC)
    524 	BL	runtimebadctxt(SB)
    525 	RET
    526 
    527 // func asmcgocall(fn, arg unsafe.Pointer) int32
    528 // Call fn(arg) on the scheduler stack,
    529 // aligned appropriately for the gcc ABI.
    530 // See cgocall.go for more details.
    531 TEXT asmcgocall(SB),NOSPLIT,$0-20
    532 	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    533 	// C TLS base pointer in AR0:AR1
    534 	MOVD	fn+0(FP), R3
    535 	MOVD	arg+8(FP), R4
    536 
    537 	MOVD	R15, R2		// save original stack pointer
    538 	MOVD	g, R5
    539 
    540 	// Figure out if we need to switch to m->g0 stack.
    541 	// We get called to create new OS threads too, and those
    542 	// come in on the m->g0 stack already.
    543 	MOVD	g_m(g), R6
    544 	MOVD	m_g0(R6), R6
    545 	CMPBEQ	R6, g, g0
    546 	BL	gosave<>(SB)
    547 	MOVD	R6, g
    548 	BL	runtimesave_g(SB)
    549 	MOVD	(g_sched+gobuf_sp)(g), R15
    550 
    551 	// Now on a scheduling stack (a pthread-created stack).
    552 g0:
    553 	// Save room for two of our pointers, plus 160 bytes of callee
    554 	// save area that lives on the caller stack.
    555 	SUB	$176, R15
    556 	MOVD	$~7, R6
    557 	AND	R6, R15                 // 8-byte alignment for gcc ABI
    558 	MOVD	R5, 168(R15)             // save old g on stack
    559 	MOVD	(g_stack+stack_hi)(R5), R5
    560 	SUB	R2, R5
    561 	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
    562 	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
    563 	MOVD	R4, R2                  // arg in R2
    564 	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
    565 
    566 	XOR	R0, R0                  // set R0 back to 0.
    567 	// Restore g, stack pointer.
    568 	MOVD	168(R15), g
    569 	BL	runtimesave_g(SB)
    570 	MOVD	(g_stack+stack_hi)(g), R5
    571 	MOVD	160(R15), R6
    572 	SUB	R6, R5
    573 	MOVD	R5, R15
    574 
    575 	MOVW	R2, ret+16(FP)
    576 	RET
    577 
    578 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
    579 // Turn the fn into a Go func (by taking its address) and call
    580 // cgocallback_gofunc.
    581 TEXT runtimecgocallback(SB),NOSPLIT,$32-32
    582 	MOVD	$fn+0(FP), R3
    583 	MOVD	R3, 8(R15)
    584 	MOVD	frame+8(FP), R3
    585 	MOVD	R3, 16(R15)
    586 	MOVD	framesize+16(FP), R3
    587 	MOVD	R3, 24(R15)
    588 	MOVD	ctxt+24(FP), R3
    589 	MOVD	R3, 32(R15)
    590 	MOVD	$runtimecgocallback_gofunc(SB), R3
    591 	BL	(R3)
    592 	RET
    593 
    594 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
    595 // See cgocall.go for more details.
    596 TEXT cgocallback_gofunc(SB),NOSPLIT,$16-32
    597 	NO_LOCAL_POINTERS
    598 
    599 	// Load m and g from thread-local storage.
    600 	MOVB	runtimeiscgo(SB), R3
    601 	CMPBEQ	R3, $0, nocgo
    602 	BL	runtimeload_g(SB)
    603 
    604 nocgo:
    605 	// If g is nil, Go did not create the current thread.
    606 	// Call needm to obtain one for temporary use.
    607 	// In this case, we're running on the thread stack, so there's
    608 	// lots of space, but the linker doesn't know. Hide the call from
    609 	// the linker analysis by using an indirect call.
    610 	CMPBEQ	g, $0, needm
    611 
    612 	MOVD	g_m(g), R8
    613 	MOVD	R8, savedm-8(SP)
    614 	BR	havem
    615 
    616 needm:
    617 	MOVD	g, savedm-8(SP) // g is zero, so is m.
    618 	MOVD	$runtimeneedm(SB), R3
    619 	BL	(R3)
    620 
    621 	// Set m->sched.sp = SP, so that if a panic happens
    622 	// during the function we are about to execute, it will
    623 	// have a valid SP to run on the g0 stack.
    624 	// The next few lines (after the havem label)
    625 	// will save this SP onto the stack and then write
    626 	// the same SP back to m->sched.sp. That seems redundant,
    627 	// but if an unrecovered panic happens, unwindm will
    628 	// restore the g->sched.sp from the stack location
    629 	// and then systemstack will try to use it. If we don't set it here,
    630 	// that restored SP will be uninitialized (typically 0) and
    631 	// will not be usable.
    632 	MOVD	g_m(g), R8
    633 	MOVD	m_g0(R8), R3
    634 	MOVD	R15, (g_sched+gobuf_sp)(R3)
    635 
    636 havem:
    637 	// Now there's a valid m, and we're running on its m->g0.
    638 	// Save current m->g0->sched.sp on stack and then set it to SP.
    639 	// Save current sp in m->g0->sched.sp in preparation for
    640 	// switch back to m->curg stack.
    641 	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
    642 	MOVD	m_g0(R8), R3
    643 	MOVD	(g_sched+gobuf_sp)(R3), R4
    644 	MOVD	R4, savedsp-16(SP)
    645 	MOVD	R15, (g_sched+gobuf_sp)(R3)
    646 
    647 	// Switch to m->curg stack and call runtime.cgocallbackg.
    648 	// Because we are taking over the execution of m->curg
    649 	// but *not* resuming what had been running, we need to
    650 	// save that information (m->curg->sched) so we can restore it.
    651 	// We can restore m->curg->sched.sp easily, because calling
    652 	// runtime.cgocallbackg leaves SP unchanged upon return.
    653 	// To save m->curg->sched.pc, we push it onto the stack.
    654 	// This has the added benefit that it looks to the traceback
    655 	// routine like cgocallbackg is going to return to that
    656 	// PC (because the frame we allocate below has the same
    657 	// size as cgocallback_gofunc's frame declared above)
    658 	// so that the traceback will seamlessly trace back into
    659 	// the earlier calls.
    660 	//
    661 	// In the new goroutine, -8(SP) is unused (where SP refers to
    662 	// m->curg's SP while we're setting it up, before we've adjusted it).
    663 	MOVD	m_curg(R8), g
    664 	BL	runtimesave_g(SB)
    665 	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
    666 	MOVD	(g_sched+gobuf_pc)(g), R5
    667 	MOVD	R5, -24(R4)
    668 	MOVD	ctxt+24(FP), R5
    669 	MOVD	R5, -16(R4)
    670 	MOVD	$-24(R4), R15
    671 	BL	runtimecgocallbackg(SB)
    672 
    673 	// Restore g->sched (== m->curg->sched) from saved values.
    674 	MOVD	0(R15), R5
    675 	MOVD	R5, (g_sched+gobuf_pc)(g)
    676 	MOVD	$24(R15), R4
    677 	MOVD	R4, (g_sched+gobuf_sp)(g)
    678 
    679 	// Switch back to m->g0's stack and restore m->g0->sched.sp.
    680 	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
    681 	// so we do not have to restore it.)
    682 	MOVD	g_m(g), R8
    683 	MOVD	m_g0(R8), g
    684 	BL	runtimesave_g(SB)
    685 	MOVD	(g_sched+gobuf_sp)(g), R15
    686 	MOVD	savedsp-16(SP), R4
    687 	MOVD	R4, (g_sched+gobuf_sp)(g)
    688 
    689 	// If the m on entry was nil, we called needm above to borrow an m
    690 	// for the duration of the call. Since the call is over, return it with dropm.
    691 	MOVD	savedm-8(SP), R6
    692 	CMPBNE	R6, $0, droppedm
    693 	MOVD	$runtimedropm(SB), R3
    694 	BL	(R3)
    695 droppedm:
    696 
    697 	// Done!
    698 	RET
    699 
    700 // void setg(G*); set g. for use by needm.
    701 TEXT runtimesetg(SB), NOSPLIT, $0-8
    702 	MOVD	gg+0(FP), g
    703 	// This only happens if iscgo, so jump straight to save_g
    704 	BL	runtimesave_g(SB)
    705 	RET
    706 
    707 // void setg_gcc(G*); set g in C TLS.
    708 // Must obey the gcc calling convention.
    709 TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
    710 	// The standard prologue clobbers LR (R14), which is callee-save in
    711 	// the C ABI, so we have to use NOFRAME and save LR ourselves.
    712 	MOVD	LR, R1
    713 	// Also save g, R10, and R11 since they're callee-save in C ABI
    714 	MOVD	R10, R3
    715 	MOVD	g, R4
    716 	MOVD	R11, R5
    717 
    718 	MOVD	R2, g
    719 	BL	runtimesave_g(SB)
    720 
    721 	MOVD	R5, R11
    722 	MOVD	R4, g
    723 	MOVD	R3, R10
    724 	MOVD	R1, LR
    725 	RET
    726 
    727 TEXT runtimegetcallerpc(SB),NOSPLIT|NOFRAME,$0-8
    728 	MOVD	0(R15), R3		// LR saved by caller
    729 	MOVD	R3, ret+0(FP)
    730 	RET
    731 
    732 TEXT runtimeabort(SB),NOSPLIT|NOFRAME,$0-0
    733 	MOVW	(R0), R0
    734 	UNDEF
    735 
    736 // int64 runtimecputicks(void)
    737 TEXT runtimecputicks(SB),NOSPLIT,$0-8
    738 	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
    739 	// This means that since about 1972 the msb has been set, making the
    740 	// result of a call to STORE CLOCK (stck) a negative number.
    741 	// We clear the msb to make it positive.
    742 	STCK	ret+0(FP)      // serialises before and after call
    743 	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
    744 	SLD	$1, R3
    745 	SRD	$1, R3
    746 	MOVD	R3, ret+0(FP)
    747 	RET
    748 
    749 // AES hashing not implemented for s390x
    750 TEXT runtimeaeshash(SB),NOSPLIT|NOFRAME,$0-0
    751 	MOVW	(R0), R15
    752 TEXT runtimeaeshash32(SB),NOSPLIT|NOFRAME,$0-0
    753 	MOVW	(R0), R15
    754 TEXT runtimeaeshash64(SB),NOSPLIT|NOFRAME,$0-0
    755 	MOVW	(R0), R15
    756 TEXT runtimeaeshashstr(SB),NOSPLIT|NOFRAME,$0-0
    757 	MOVW	(R0), R15
    758 
    759 // memequal(a, b unsafe.Pointer, size uintptr) bool
    760 TEXT runtimememequal(SB),NOSPLIT|NOFRAME,$0-25
    761 	MOVD	a+0(FP), R3
    762 	MOVD	b+8(FP), R5
    763 	MOVD	size+16(FP), R6
    764 	LA	ret+24(FP), R7
    765 	BR	runtimememeqbody(SB)
    766 
    767 // memequal_varlen(a, b unsafe.Pointer) bool
    768 TEXT runtimememequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
    769 	MOVD	a+0(FP), R3
    770 	MOVD	b+8(FP), R5
    771 	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
    772 	LA	ret+16(FP), R7
    773 	BR	runtimememeqbody(SB)
    774 
    775 TEXT bytesEqual(SB),NOSPLIT|NOFRAME,$0-49
    776 	MOVD	a_len+8(FP), R2
    777 	MOVD	b_len+32(FP), R6
    778 	MOVD	a+0(FP), R3
    779 	MOVD	b+24(FP), R5
    780 	LA	ret+48(FP), R7
    781 	CMPBNE	R2, R6, notequal
    782 	BR	runtimememeqbody(SB)
    783 notequal:
    784 	MOVB	$0, ret+48(FP)
    785 	RET
    786 
    787 // input:
    788 //   R3 = a
    789 //   R5 = b
    790 //   R6 = len
    791 //   R7 = address of output byte (stores 0 or 1 here)
    792 //   a and b have the same length
    793 TEXT runtimememeqbody(SB),NOSPLIT|NOFRAME,$0-0
    794 	CMPBEQ	R3, R5, equal
    795 loop:
    796 	CMPBEQ	R6, $0, equal
    797 	CMPBLT	R6, $32, tiny
    798 	CMP	R6, $256
    799 	BLT	tail
    800 	CLC	$256, 0(R3), 0(R5)
    801 	BNE	notequal
    802 	SUB	$256, R6
    803 	LA	256(R3), R3
    804 	LA	256(R5), R5
    805 	BR	loop
    806 tail:
    807 	SUB	$1, R6, R8
    808 	EXRL	$runtimememeqbodyclc(SB), R8
    809 	BEQ	equal
    810 notequal:
    811 	MOVB	$0, 0(R7)
    812 	RET
    813 equal:
    814 	MOVB	$1, 0(R7)
    815 	RET
    816 tiny:
    817 	MOVD	$0, R2
    818 	CMPBLT	R6, $16, lt16
    819 	MOVD	0(R3), R8
    820 	MOVD	0(R5), R9
    821 	CMPBNE	R8, R9, notequal
    822 	MOVD	8(R3), R8
    823 	MOVD	8(R5), R9
    824 	CMPBNE	R8, R9, notequal
    825 	LA	16(R2), R2
    826 	SUB	$16, R6
    827 lt16:
    828 	CMPBLT	R6, $8, lt8
    829 	MOVD	0(R3)(R2*1), R8
    830 	MOVD	0(R5)(R2*1), R9
    831 	CMPBNE	R8, R9, notequal
    832 	LA	8(R2), R2
    833 	SUB	$8, R6
    834 lt8:
    835 	CMPBLT	R6, $4, lt4
    836 	MOVWZ	0(R3)(R2*1), R8
    837 	MOVWZ	0(R5)(R2*1), R9
    838 	CMPBNE	R8, R9, notequal
    839 	LA	4(R2), R2
    840 	SUB	$4, R6
    841 lt4:
    842 #define CHECK(n) \
    843 	CMPBEQ	R6, $n, equal \
    844 	MOVB	n(R3)(R2*1), R8 \
    845 	MOVB	n(R5)(R2*1), R9 \
    846 	CMPBNE	R8, R9, notequal
    847 	CHECK(0)
    848 	CHECK(1)
    849 	CHECK(2)
    850 	CHECK(3)
    851 	BR	equal
    852 
    853 TEXT runtimememeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
    854 	CLC	$1, 0(R3), 0(R5)
    855 	RET
    856 
    857 TEXT bytesIndexByte(SB),NOSPLIT|NOFRAME,$0-40
    858 	MOVD	s+0(FP), R3     // s => R3
    859 	MOVD	s_len+8(FP), R4 // s_len => R4
    860 	MOVBZ	c+24(FP), R5    // c => R5
    861 	MOVD	$ret+32(FP), R2 // &ret => R9
    862 	BR	runtimeindexbytebody(SB)
    863 
    864 TEXT stringsIndexByte(SB),NOSPLIT|NOFRAME,$0-32
    865 	MOVD	s+0(FP), R3     // s => R3
    866 	MOVD	s_len+8(FP), R4 // s_len => R4
    867 	MOVBZ	c+16(FP), R5    // c => R5
    868 	MOVD	$ret+24(FP), R2 // &ret => R9
    869 	BR	runtimeindexbytebody(SB)
    870 
    871 // input:
    872 // R3: s
    873 // R4: s_len
    874 // R5: c -- byte sought
    875 // R2: &ret -- address to put index into
    876 TEXT runtimeindexbytebody(SB),NOSPLIT|NOFRAME,$0
    877 	CMPBEQ	R4, $0, notfound
    878 	MOVD	R3, R6          // store base for later
    879 	ADD	R3, R4, R8      // the address after the end of the string
    880 	//if the length is small, use loop; otherwise, use vector or srst search
    881 	CMPBGE	R4, $16, large
    882 
    883 residual:
    884 	CMPBEQ	R3, R8, notfound
    885 	MOVBZ	0(R3), R7
    886 	LA	1(R3), R3
    887 	CMPBNE	R7, R5, residual
    888 
    889 found:
    890 	SUB	R6, R3
    891 	SUB	$1, R3
    892 	MOVD	R3, 0(R2)
    893 	RET
    894 
    895 notfound:
    896 	MOVD	$-1, 0(R2)
    897 	RET
    898 
    899 large:
    900 	MOVBZ	cpu+facilities_hasVX(SB), R1
    901 	CMPBNE	R1, $0, vectorimpl
    902 
    903 srstimpl:                       // no vector facility
    904 	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
    905 srstloop:
    906 	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
    907 	BVS	srstloop        // interrupted - continue
    908 	BGT	notfoundr0
    909 foundr0:
    910 	XOR	R0, R0          // reset R0
    911 	SUB	R6, R8          // remove base
    912 	MOVD	R8, 0(R2)
    913 	RET
    914 notfoundr0:
    915 	XOR	R0, R0          // reset R0
    916 	MOVD	$-1, 0(R2)
    917 	RET
    918 
    919 vectorimpl:
    920 	//if the address is not 16byte aligned, use loop for the header
    921 	MOVD	R3, R8
    922 	AND	$15, R8
    923 	CMPBGT	R8, $0, notaligned
    924 
    925 aligned:
    926 	ADD	R6, R4, R8
    927 	MOVD	R8, R7
    928 	AND	$-16, R7
    929 	// replicate c across V17
    930 	VLVGB	$0, R5, V19
    931 	VREPB	$0, V19, V17
    932 
    933 vectorloop:
    934 	CMPBGE	R3, R7, residual
    935 	VL	0(R3), V16    // load string to be searched into V16
    936 	ADD	$16, R3
    937 	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
    938 	BVS	vectorloop
    939 
    940 	// when vector search found c in the string
    941 	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
    942 	SUB	$16, R3
    943 	SUB	R6, R3
    944 	ADD	R3, R7
    945 	MOVD	R7, 0(R2)
    946 	RET
    947 
    948 notaligned:
    949 	MOVD	R3, R8
    950 	AND	$-16, R8
    951 	ADD     $16, R8
    952 notalignedloop:
    953 	CMPBEQ	R3, R8, aligned
    954 	MOVBZ	0(R3), R7
    955 	LA	1(R3), R3
    956 	CMPBNE	R7, R5, notalignedloop
    957 	BR	found
    958 
    959 TEXT runtimereturn0(SB), NOSPLIT, $0
    960 	MOVW	$0, R3
    961 	RET
    962 
    963 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
    964 // Must obey the gcc calling convention.
    965 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
    966 	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
    967 	MOVD	g, R1
    968 	MOVD	R10, R3
    969 	MOVD	LR, R4
    970 	MOVD	R11, R5
    971 
    972 	BL	runtimeload_g(SB)	// clobbers g (R13), R10, R11
    973 	MOVD	g_m(g), R2
    974 	MOVD	m_curg(R2), R2
    975 	MOVD	(g_stack+stack_hi)(R2), R2
    976 
    977 	MOVD	R1, g
    978 	MOVD	R3, R10
    979 	MOVD	R4, LR
    980 	MOVD	R5, R11
    981 	RET
    982 
    983 // The top-most function running on a goroutine
    984 // returns to goexit+PCQuantum.
    985 TEXT runtimegoexit(SB),NOSPLIT|NOFRAME,$0-0
    986 	BYTE $0x07; BYTE $0x00; // 2-byte nop
    987 	BL	runtimegoexit1(SB)	// does not return
    988 	// traceback from goexit1 must hit code range of goexit
    989 	BYTE $0x07; BYTE $0x00; // 2-byte nop
    990 
    991 TEXT runtimesigreturn(SB),NOSPLIT,$0-0
    992 	RET
    993 
    994 TEXT publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
    995         // Stores are already ordered on s390x, so this is just a
    996         // compile barrier.
    997 	RET
    998 
    999 TEXT runtimecmpstring(SB),NOSPLIT|NOFRAME,$0-40
   1000 	MOVD	s1_base+0(FP), R3
   1001 	MOVD	s1_len+8(FP), R4
   1002 	MOVD	s2_base+16(FP), R5
   1003 	MOVD	s2_len+24(FP), R6
   1004 	LA	ret+32(FP), R7
   1005 	BR	runtimecmpbody(SB)
   1006 
   1007 TEXT bytesCompare(SB),NOSPLIT|NOFRAME,$0-56
   1008 	MOVD	s1+0(FP), R3
   1009 	MOVD	s1+8(FP), R4
   1010 	MOVD	s2+24(FP), R5
   1011 	MOVD	s2+32(FP), R6
   1012 	LA	res+48(FP), R7
   1013 	BR	runtimecmpbody(SB)
   1014 
   1015 // input:
   1016 //   R3 = a
   1017 //   R4 = alen
   1018 //   R5 = b
   1019 //   R6 = blen
   1020 //   R7 = address of output word (stores -1/0/1 here)
   1021 TEXT runtimecmpbody(SB),NOSPLIT|NOFRAME,$0-0
   1022 	CMPBEQ	R3, R5, cmplengths
   1023 	MOVD	R4, R8
   1024 	CMPBLE	R4, R6, amin
   1025 	MOVD	R6, R8
   1026 amin:
   1027 	CMPBEQ	R8, $0, cmplengths
   1028 	CMP	R8, $256
   1029 	BLE	tail
   1030 loop:
   1031 	CLC	$256, 0(R3), 0(R5)
   1032 	BGT	gt
   1033 	BLT	lt
   1034 	SUB	$256, R8
   1035 	CMP	R8, $256
   1036 	BGT	loop
   1037 tail:
   1038 	SUB	$1, R8
   1039 	EXRL	$runtimecmpbodyclc(SB), R8
   1040 	BGT	gt
   1041 	BLT	lt
   1042 cmplengths:
   1043 	CMP	R4, R6
   1044 	BEQ	eq
   1045 	BLT	lt
   1046 gt:
   1047 	MOVD	$1, 0(R7)
   1048 	RET
   1049 lt:
   1050 	MOVD	$-1, 0(R7)
   1051 	RET
   1052 eq:
   1053 	MOVD	$0, 0(R7)
   1054 	RET
   1055 
   1056 TEXT runtimecmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   1057 	CLC	$1, 0(R3), 0(R5)
   1058 	RET
   1059 
   1060 // func supportsVX() bool
   1061 TEXT stringssupportsVX(SB),NOSPLIT,$0-1
   1062 	MOVBZ	runtimecpu+facilities_hasVX(SB), R0
   1063 	MOVB	R0, ret+0(FP)
   1064 	RET
   1065 
   1066 // func supportsVX() bool
   1067 TEXT bytessupportsVX(SB),NOSPLIT,$0-1
   1068 	MOVBZ	runtimecpu+facilities_hasVX(SB), R0
   1069 	MOVB	R0, ret+0(FP)
   1070 	RET
   1071 
   1072 // func indexShortStr(s, sep string) int
   1073 // Caller must confirm availability of vx facility before calling.
   1074 TEXT stringsindexShortStr(SB),NOSPLIT|NOFRAME,$0-40
   1075 	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
   1076 	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
   1077 	MOVD	$ret+32(FP), R5
   1078 	BR	runtimeindexShortStr(SB)
   1079 
   1080 // func indexShortStr(s, sep []byte) int
   1081 // Caller must confirm availability of vx facility before calling.
   1082 TEXT bytesindexShortStr(SB),NOSPLIT|NOFRAME,$0-56
   1083 	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
   1084 	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
   1085 	MOVD	$ret+48(FP), R5
   1086 	BR	runtimeindexShortStr(SB)
   1087 
   1088 // s: string we are searching
   1089 // sep: string to search for
   1090 // R1=&s[0], R2=len(s)
   1091 // R3=&sep[0], R4=len(sep)
   1092 // R5=&ret (int)
   1093 // Caller must confirm availability of vx facility before calling.
   1094 TEXT runtimeindexShortStr(SB),NOSPLIT|NOFRAME,$0
   1095 	CMPBGT	R4, R2, notfound
   1096 	ADD	R1, R2
   1097 	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
   1098 	CMPBEQ	R4, $0, notfound
   1099 	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
   1100 	VLL	R4, (R3), V0 // contains first 16 bytes of sep
   1101 	MOVD	R1, R7
   1102 index2plus:
   1103 	CMPBNE	R4, $1, index3plus
   1104 	MOVD	$15(R7), R9
   1105 	CMPBGE	R9, R2, index2to16
   1106 	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
   1107 	VONE	V16
   1108 	VREPH	$0, V0, V1
   1109 	CMPBGE	R9, R2, index2to16
   1110 index2loop:
   1111 	VL	0(R7), V2          // 16 bytes, even indices
   1112 	VL	1(R7), V4          // 16 bytes, odd indices
   1113 	VCEQH	V1, V2, V5         // compare even indices
   1114 	VCEQH	V1, V4, V6         // compare odd indices
   1115 	VSEL	V5, V6, V31, V7    // merge even and odd indices
   1116 	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   1117 	BLT	foundV17
   1118 	MOVD	$16(R7), R7        // R7+=16
   1119 	ADD	$15, R7, R9
   1120 	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
   1121 	CMPBLE	R7, R2, index2to16
   1122 	BR	notfound
   1123 
   1124 index3plus:
   1125 	CMPBNE	R4, $2, index4plus
   1126 	ADD	$15, R7, R9
   1127 	CMPBGE	R9, R2, index2to16
   1128 	MOVD	$1, R0
   1129 	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
   1130 	VONE	V16
   1131 	VREPH	$0, V0, V1
   1132 	VREPB	$2, V0, V8
   1133 index3loop:
   1134 	VL	(R7), V2           // load 16-bytes into V2
   1135 	VLL	R0, 16(R7), V3     // load 2-bytes into V3
   1136 	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
   1137 	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
   1138 	VCEQH	V1, V2, V5         // compare 2-byte even indices
   1139 	VCEQH	V1, V4, V6         // compare 2-byte odd indices
   1140 	VCEQB	V8, V9, V10        // compare last bytes
   1141 	VSEL	V5, V6, V31, V7    // merge even and odd indices
   1142 	VN	V7, V10, V7        // AND indices with last byte
   1143 	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   1144 	BLT	foundV17
   1145 	MOVD	$16(R7), R7        // R7+=16
   1146 	ADD	$15, R7, R9
   1147 	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
   1148 	CMPBLE	R7, R2, index2to16
   1149 	BR	notfound
   1150 
   1151 index4plus:
   1152 	CMPBNE	R4, $3, index5plus
   1153 	ADD	$15, R7, R9
   1154 	CMPBGE	R9, R2, index2to16
   1155 	MOVD	$2, R0
   1156 	VGBM	$0x8888, V29       // 0xff000000ff000000...
   1157 	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
   1158 	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
   1159 	VONE	V16
   1160 	VREPF	$0, V0, V1
   1161 index4loop:
   1162 	VL	(R7), V2           // load 16-bytes into V2
   1163 	VLL	R0, 16(R7), V3     // load 3-bytes into V3
   1164 	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
   1165 	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
   1166 	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
   1167 	VCEQF	V1, V2, V5         // compare index 0, 4, ...
   1168 	VCEQF	V1, V4, V6         // compare index 1, 5, ...
   1169 	VCEQF	V1, V9, V11        // compare index 2, 6, ...
   1170 	VCEQF	V1, V10, V12       // compare index 3, 7, ...
   1171 	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
   1172 	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
   1173 	VSEL	V13, V14, V31, V7  // final merge
   1174 	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   1175 	BLT	foundV17
   1176 	MOVD	$16(R7), R7        // R7+=16
   1177 	ADD	$15, R7, R9
   1178 	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
   1179 	CMPBLE	R7, R2, index2to16
   1180 	BR	notfound
   1181 
   1182 index5plus:
   1183 	CMPBGT	R4, $15, index17plus
   1184 index2to16:
   1185 	CMPBGT	R7, R2, notfound
   1186 	MOVD	$1(R7), R8
   1187 	CMPBGT	R8, R2, index2to16tail
   1188 index2to16loop:
   1189 	// unrolled 2x
   1190 	VLL	R4, (R7), V1
   1191 	VLL	R4, 1(R7), V2
   1192 	VCEQGS	V0, V1, V3
   1193 	BEQ	found
   1194 	MOVD	$1(R7), R7
   1195 	VCEQGS	V0, V2, V4
   1196 	BEQ	found
   1197 	MOVD	$1(R7), R7
   1198 	CMPBLT	R7, R2, index2to16loop
   1199 	CMPBGT	R7, R2, notfound
   1200 index2to16tail:
   1201 	VLL	R4, (R7), V1
   1202 	VCEQGS	V0, V1, V2
   1203 	BEQ	found
   1204 	BR	notfound
   1205 
   1206 index17plus:
   1207 	CMPBGT	R4, $31, index33plus
   1208 	SUB	$16, R4, R0
   1209 	VLL	R0, 16(R3), V1
   1210 	VONE	V7
   1211 index17to32loop:
   1212 	VL	(R7), V2
   1213 	VLL	R0, 16(R7), V3
   1214 	VCEQG	V0, V2, V4
   1215 	VCEQG	V1, V3, V5
   1216 	VN	V4, V5, V6
   1217 	VCEQGS	V6, V7, V8
   1218 	BEQ	found
   1219 	MOVD	$1(R7), R7
   1220 	CMPBLE  R7, R2, index17to32loop
   1221 	BR	notfound
   1222 
   1223 index33plus:
   1224 	CMPBGT	R4, $47, index49plus
   1225 	SUB	$32, R4, R0
   1226 	VL	16(R3), V1
   1227 	VLL	R0, 32(R3), V2
   1228 	VONE	V11
   1229 index33to48loop:
   1230 	VL	(R7), V3
   1231 	VL	16(R7), V4
   1232 	VLL	R0, 32(R7), V5
   1233 	VCEQG	V0, V3, V6
   1234 	VCEQG	V1, V4, V7
   1235 	VCEQG	V2, V5, V8
   1236 	VN	V6, V7, V9
   1237 	VN	V8, V9, V10
   1238 	VCEQGS	V10, V11, V12
   1239 	BEQ	found
   1240 	MOVD	$1(R7), R7
   1241 	CMPBLE  R7, R2, index33to48loop
   1242 	BR	notfound
   1243 
   1244 index49plus:
   1245 	CMPBGT	R4, $63, index65plus
   1246 	SUB	$48, R4, R0
   1247 	VL	16(R3), V1
   1248 	VL	32(R3), V2
   1249 	VLL	R0, 48(R3), V3
   1250 	VONE	V15
   1251 index49to64loop:
   1252 	VL	(R7), V4
   1253 	VL	16(R7), V5
   1254 	VL	32(R7), V6
   1255 	VLL	R0, 48(R7), V7
   1256 	VCEQG	V0, V4, V8
   1257 	VCEQG	V1, V5, V9
   1258 	VCEQG	V2, V6, V10
   1259 	VCEQG	V3, V7, V11
   1260 	VN	V8, V9, V12
   1261 	VN	V10, V11, V13
   1262 	VN	V12, V13, V14
   1263 	VCEQGS	V14, V15, V16
   1264 	BEQ	found
   1265 	MOVD	$1(R7), R7
   1266 	CMPBLE  R7, R2, index49to64loop
   1267 notfound:
   1268 	MOVD	$-1, (R5)
   1269 	RET
   1270 
   1271 index65plus:
   1272 	// not implemented
   1273 	MOVD	$0, (R0)
   1274 	RET
   1275 
   1276 foundV17: // index is in doubleword V17[0]
   1277 	VLGVG	$0, V17, R8
   1278 	ADD	R8, R7
   1279 found:
   1280 	SUB	R1, R7
   1281 	MOVD	R7, (R5)
   1282 	RET
   1283 
   1284 // This is called from .init_array and follows the platform, not Go, ABI.
   1285 // We are overly conservative. We could only save the registers we use.
   1286 // However, since this function is only called once per loaded module
   1287 // performance is unimportant.
   1288 TEXT runtimeaddmoduledata(SB),NOSPLIT|NOFRAME,$0-0
   1289 	// Save R6-R15 in the register save area of the calling function.
   1290 	// Don't bother saving F8-F15 as we aren't doing any calls.
   1291 	STMG	R6, R15, 48(R15)
   1292 
   1293 	// append the argument (passed in R2, as per the ELF ABI) to the
   1294 	// moduledata linked list.
   1295 	MOVD	runtimelastmoduledatap(SB), R1
   1296 	MOVD	R2, moduledata_next(R1)
   1297 	MOVD	R2, runtimelastmoduledatap(SB)
   1298 
   1299 	// Restore R6-R15.
   1300 	LMG	48(R15), R6, R15
   1301 	RET
   1302 
   1303 TEXT checkASM(SB),NOSPLIT,$0-1
   1304 	MOVB	$1, ret+0(FP)
   1305 	RET
   1306