Home | History | Annotate | Download | only in emultempl
      1 /* Overlay manager for SPU.
      2 
      3    Copyright (C) 2006-2014 Free Software Foundation, Inc.
      4 
      5    This file is part of the GNU Binutils.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3 of the License, or
     10    (at your option) any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program; if not, write to the Free Software
     19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
     20    MA 02110-1301, USA.  */
     21 
     22 /* MFC DMA defn's.  */
     23 #define MFC_GET_CMD		0x40
     24 #define MFC_MAX_DMA_SIZE	0x4000
     25 #define MFC_TAG_UPDATE_ALL	2
     26 #define MFC_TAG_ID		0
     27 
     28 /* Register usage.  */
     29 #define reserved1	$75
     30 #define parm		$75
     31 #define tab1		reserved1
     32 #define tab2		reserved1
     33 #define vma		reserved1
     34 #define oldvma		reserved1
     35 #define newmask		reserved1
     36 #define map		reserved1
     37 
     38 #define reserved2	$76
     39 #define off1		reserved2
     40 #define off2		reserved2
     41 #define present1	reserved2
     42 #define present2	reserved2
     43 #define sz		reserved2
     44 #define cmp		reserved2
     45 #define add64		reserved2
     46 #define cgbits		reserved2
     47 #define off3		reserved2
     48 #define off4		reserved2
     49 #define addr4		reserved2
     50 #define off5		reserved2
     51 #define tagstat		reserved2
     52 
     53 #define reserved3	$77
     54 #define size1		reserved3
     55 #define size2		reserved3
     56 #define rv3		reserved3
     57 #define ealo		reserved3
     58 #define cmd		reserved3
     59 #define off64		reserved3
     60 #define tab3		reserved3
     61 #define tab4		reserved3
     62 #define tab5		reserved3
     63 
     64 #define reserved4	$78
     65 #define ovl		reserved4
     66 #define rv2		reserved4
     67 #define rv5		reserved4
     68 #define cgshuf		reserved4
     69 #define newovl		reserved4
     70 #define irqtmp1		reserved4
     71 #define irqtmp2		reserved4
     72 
     73 #define reserved5	$79
     74 #define target		reserved5
     75 
     76 #define save1		$74
     77 #define rv4		save1
     78 #define rv7		save1
     79 #define tagid		save1
     80 #define maxsize		save1
     81 #define pbyte		save1
     82 #define pbit		save1
     83 
     84 #define save2		$73
     85 #define cur		save2
     86 #define rv6		save2
     87 #define osize		save2
     88 #define zovl		save2
     89 #define oldovl		save2
     90 #define newvma		save2
     91 
     92 #define save3		$72
     93 #define rv1		save3
     94 #define ea64		save3
     95 #define buf3		save3
     96 #define genwi		save3
     97 #define newmap		save3
     98 #define oldmask		save3
     99 
    100 #define save4		$71
    101 #define irq_stat	save4
    102 
    103 	.text
    104 	.align 	4
    105 	.type	__rv_pattern, @object
    106 	.size	__rv_pattern, 16
    107 __rv_pattern:
    108 	.word	0x00010203, 0x10111213, 0x80808080, 0x80808080
    109 
    110 	.type	__cg_pattern, @object
    111 	.size	__cg_pattern, 16
    112 __cg_pattern:
    113 	.word	0x04050607, 0x80808080, 0x80808080, 0x80808080
    114 
    115 	.type	__ovly_current, @object
    116 	.size	__ovly_current, 16
    117 __ovly_current:
    118 	.space	16
    119 
    120 /*
    121  * __ovly_return - stub for returning from overlay functions.
    122  *
    123  * On entry the four slots of $lr are:
    124  *   __ovly_return, prev ovl index, caller return addr, undefined.
    125  *
    126  * Load the previous overlay and jump to the caller return address.
    127  * Updates __ovly_current.
    128  */
    129 	.align 	4
    130 	.global	__ovly_return
    131 	.type	__ovly_return, @function
    132 __ovly_return:
    133 	ila	tab1, _ovly_table - 16				# 0,2	0
    134 	shlqbyi	ovl, $lr, 4					# 1,4	0
    135 #nop
    136 	shlqbyi	target, $lr, 8					# 1,4	1
    137 #nop; lnop
    138 #nop; lnop
    139 	shli	off1, ovl, 4					# 0,4	4
    140 #lnop
    141 #nop
    142 	hbr	ovly_ret9, target				# 1,15	5
    143 #nop; lnop
    144 #nop; lnop
    145 #nop
    146 	lqx	vma, tab1, off1					# 1,6	8
    147 #ifdef OVLY_IRQ_SAVE
    148 	nop
    149 	stqd	save4, -64($sp)					# 1,6	9
    150 #else
    151 #nop; lnop
    152 #endif
    153 #nop; lnop
    154 #nop; lnop
    155 #nop; lnop
    156 #nop; lnop
    157 #nop
    158 	rotqbyi	size1, vma, 4					# 1,4	14
    159 #nop
    160 	stqd	save3, -48($sp)					# 1,6	15
    161 #nop
    162 	stqd	save2, -32($sp)					# 1,6	16
    163 #nop
    164 	stqd	save1, -16($sp)					# 1,6	17
    165 	andi	present1, size1, 1				# 0,2	18
    166 	stqr	ovl, __ovly_current				# 1,6	18
    167 #nop; lnop
    168 #nop
    169 	brz	present1, do_load				# 1,4	20
    170 ovly_ret9:
    171 #nop
    172 	bi	target						# 1,4	21
    173 
    174 /*
    175  * __ovly_load - copy an overlay partion to local store.
    176  *
    177  * On entry $75 points to a word consisting of the overlay index in
    178  * the top 14 bits, and the target address in the bottom 18 bits.
    179  *
    180  * Sets up $lr to return via __ovly_return.  If $lr is already set
    181  * to return via __ovly_return, don't change it.  In that case we
    182  * have a tail call from one overlay function to another.
    183  * Updates __ovly_current.
    184  */
    185 	.align  3
    186 	.global	__ovly_load
    187 	.type	__ovly_load, @function
    188 __ovly_load:
    189 #if OVL_STUB_SIZE == 8
    190 ########
    191 #nop
    192 	lqd	target, 0(parm)					# 1,6	-11
    193 #nop; lnop
    194 #nop; lnop
    195 #nop; lnop
    196 #nop; lnop
    197 #nop; lnop
    198 #nop
    199 	rotqby	target, target, parm				# 1,4	-5
    200 	ila	tab2, _ovly_table - 16				# 0,2	-4
    201 	stqd	save3, -48($sp)					# 1,6	-4
    202 #nop
    203 	stqd	save2, -32($sp)					# 1,6	-3
    204 #nop
    205 	stqd	save1, -16($sp)					# 1,6	-2
    206 	rotmi	ovl, target, -18				# 0,4	-1
    207 	hbr	ovly_load9, target				# 1,15	-1
    208 	ila	rv1, __ovly_return				# 0,2	0
    209 #lnop
    210 #nop; lnop
    211 #nop
    212 	lqr	cur, __ovly_current				# 1,6	2
    213 	shli	off2, ovl, 4					# 0,4	3
    214 	stqr	ovl, __ovly_current				# 1,6	3
    215 	ceq	rv2, $lr, rv1					# 0,2	4
    216 	lqr	rv3, __rv_pattern				# 1,6	4
    217 #nop; lnop
    218 #nop; lnop
    219 #nop
    220 	lqx	vma, tab2, off2					# 1,6	7
    221 ########
    222 #else /* OVL_STUB_SIZE == 16 */
    223 ########
    224 	ila	tab2, _ovly_table - 16				# 0,2	0
    225 	stqd	save3, -48($sp)					# 1,6	0
    226 	ila	rv1, __ovly_return				# 0,2	1
    227 	stqd	save2, -32($sp)					# 1,6	1
    228 	shli	off2, ovl, 4					# 0,4	2
    229 	lqr	cur, __ovly_current				# 1,6	2
    230 	nop
    231 	stqr	ovl, __ovly_current				# 1,6	3
    232 	ceq	rv2, $lr, rv1					# 0,2	4
    233 	lqr	rv3, __rv_pattern				# 1,6	4
    234 #nop
    235 	hbr	ovly_load9, target				# 1,15	5
    236 #nop
    237 	lqx	vma, tab2, off2					# 1,6	6
    238 #nop
    239 	stqd	save1, -16($sp)					# 1,6	7
    240 ########
    241 #endif
    242 
    243 #nop; lnop
    244 #nop; lnop
    245 #nop
    246 	shufb	rv4, rv1, cur, rv3				# 1,4	10
    247 #nop
    248 	fsmb	rv5, rv2					# 1,4	11
    249 #nop
    250 	rotqmbyi rv6, $lr, -8					# 1,4	12
    251 #nop
    252 	rotqbyi	size2, vma, 4					# 1,4	13
    253 #nop
    254 	lqd	save3, -48($sp)					# 1,6	14
    255 #nop; lnop
    256 	or	rv7, rv4, rv6					# 0,2	16
    257 	lqd	save2, -32($sp)					# 1,6	16
    258 	andi	present2, size2, 1				# 0,2	17
    259 #ifdef OVLY_IRQ_SAVE
    260 	stqd	save4, -64($sp)					# 1,6	17
    261 #else
    262 	lnop							# 1,0	17
    263 #endif
    264 	selb	$lr, rv7, $lr, rv5				# 0,2	18
    265 	lqd	save1, -16($sp)					# 1,6	18
    266 #nop
    267 	brz	present2, do_load				# 1,4	19
    268 ovly_load9:
    269 #nop
    270 	bi	target						# 1,4	20
    271 
    272 /* If we get here, we are about to load a new overlay.
    273  * "vma" contains the relevant entry from _ovly_table[].
    274  *	extern struct {
    275  *		u32 vma;
    276  *		u32 size;
    277  *		u32 file_offset;
    278  *		u32 buf;
    279  *	} _ovly_table[];
    280  */
    281 	.align  3
    282 	.global	__ovly_load_event
    283 	.type	__ovly_load_event, @function
    284 __ovly_load_event:
    285 do_load:
    286 #ifdef OVLY_IRQ_SAVE
    287 	ila	irqtmp1, do_load10				# 0,2	-5
    288 	rotqbyi	sz, vma, 8					# 1,4	-5
    289 #nop
    290 	rdch	irq_stat, $SPU_RdMachStat			# 1,6	-4
    291 #nop
    292 	bid	irqtmp1						# 1,4	-3
    293 do_load10:
    294 	nop
    295 #else
    296 #nop
    297 	rotqbyi	sz, vma, 8					# 1,4	0
    298 #endif
    299 	rotqbyi	osize, vma, 4					# 1,4	1
    300 #nop
    301 	lqa	ea64, _EAR_					# 1,6	2
    302 #nop
    303 	lqr	cgshuf, __cg_pattern				# 1,6	3
    304 
    305 /* We could predict the branch at the end of this loop by adding a few
    306    instructions, and there are plenty of free cycles to do so without
    307    impacting loop execution time.  However, it doesn't make a great
    308    deal of sense since we need to wait for the dma to complete anyway.  */
    309 __ovly_xfer_loop:
    310 #nop
    311 	rotqmbyi off64, sz, -4					# 1,4	4
    312 #nop; lnop
    313 #nop; lnop
    314 #nop; lnop
    315 	cg	cgbits, ea64, off64				# 0,2	8
    316 #lnop
    317 #nop; lnop
    318 #nop
    319 	shufb	add64, cgbits, cgbits, cgshuf			# 1,4	10
    320 #nop; lnop
    321 #nop; lnop
    322 #nop; lnop
    323 	addx	add64, ea64, off64				# 0,2	14
    324 #lnop
    325 	ila	maxsize, MFC_MAX_DMA_SIZE			# 0,2	15
    326 	lnop
    327 	ori	ea64, add64, 0					# 0,2	16
    328 	rotqbyi	ealo, add64, 4					# 1,4	16
    329 	cgt	cmp, osize, maxsize				# 0,2	17
    330 	wrch	$MFC_LSA, vma					# 1,6	17
    331 #nop; lnop
    332 	selb	sz, osize, maxsize, cmp				# 0,2	19
    333 	wrch	$MFC_EAH, ea64					# 1,6	19
    334 	ila	tagid, MFC_TAG_ID				# 0,2	20
    335 	wrch	$MFC_EAL, ealo					# 1,6	20
    336 	ila	cmd, MFC_GET_CMD				# 0,2	21
    337 	wrch	$MFC_Size, sz					# 1,6	21
    338 	sf	osize, sz, osize				# 0,2	22
    339 	wrch	$MFC_TagId, tagid				# 1,6	22
    340 	a	vma, vma, sz					# 0,2	23
    341 	wrch	$MFC_Cmd, cmd					# 1,6	23
    342 #nop
    343 	brnz	osize, __ovly_xfer_loop				# 1,4	24
    344 
    345 /* Now update our data structions while waiting for DMA to complete.
    346    Low bit of .size needs to be cleared on the _ovly_table entry
    347    corresponding to the evicted overlay, and set on the entry for the
    348    newly loaded overlay.  Note that no overlay may in fact be evicted
    349    as _ovly_buf_table[] starts with all zeros.  Don't zap .size entry
    350    for zero index!  Also of course update the _ovly_buf_table entry.  */
    351 #nop
    352 	lqr	newovl, __ovly_current				# 1,6	25
    353 #nop; lnop
    354 #nop; lnop
    355 #nop; lnop
    356 #nop; lnop
    357 #nop; lnop
    358 	shli	off3, newovl, 4					# 0,4	31
    359 #lnop
    360 	ila	tab3, _ovly_table - 16				# 0,2	32
    361 #lnop
    362 #nop
    363 	fsmbi	pbyte, 0x100					# 1,4	33
    364 #nop; lnop
    365 #nop
    366 	lqx	vma, tab3, off3					# 1,6	35
    367 #nop; lnop
    368 	andi	pbit, pbyte, 1					# 0,2	37
    369 	lnop
    370 #nop; lnop
    371 #nop; lnop
    372 #nop; lnop
    373 	or	newvma, vma, pbit				# 0,2	41
    374 	rotqbyi	buf3, vma, 12					# 1,4	41
    375 #nop; lnop
    376 #nop
    377 	stqx	newvma, tab3, off3				# 1,6	43
    378 #nop; lnop
    379 	shli	off4, buf3, 2					# 1,4	45
    380 #lnop
    381 	ila	tab4, _ovly_buf_table - 4			# 0,2	46
    382 #lnop
    383 #nop; lnop
    384 #nop; lnop
    385 #nop
    386 	lqx	map, tab4, off4					# 1,6	49
    387 #nop
    388 	cwx	genwi, tab4, off4				# 1,4	50
    389 	a	addr4, tab4, off4				# 0,2	51
    390 #lnop
    391 #nop; lnop
    392 #nop; lnop
    393 #nop; lnop
    394 #nop
    395 	rotqby	oldovl, map, addr4				# 1,4	55
    396 #nop
    397 	shufb	newmap, newovl, map, genwi			# 0,4	56
    398 #if MFC_TAG_ID < 16
    399 	ila	newmask, 1 << MFC_TAG_ID			# 0,2	57
    400 #else
    401 	ilhu	newmask, 1 << (MFC_TAG_ID - 16)			# 0,2	57
    402 #endif
    403 #lnop
    404 #nop; lnop
    405 #nop; lnop
    406 	stqd	newmap, 0(addr4)				# 1,6	60
    407 
    408 /* Save app's tagmask, wait for DMA complete, restore mask.  */
    409 	ila	tagstat, MFC_TAG_UPDATE_ALL			# 0,2	61
    410 	rdch	oldmask, $MFC_RdTagMask				# 1,6	61
    411 #nop
    412 	wrch	$MFC_WrTagMask, newmask				# 1,6	62
    413 #nop
    414 	wrch	$MFC_WrTagUpdate, tagstat			# 1,6	63
    415 #nop
    416 	rdch	tagstat, $MFC_RdTagStat				# 1,6	64
    417 #nop
    418 	sync							# 1,4	65
    419 /* Any hint prior to the sync is lost.  A hint here allows the branch
    420    to complete 15 cycles after the hint.  With no hint the branch will
    421    take 18 or 19 cycles.  */
    422 	ila	tab5, _ovly_table - 16				# 0,2	66
    423 	hbr	do_load99, target				# 1,15	66
    424 	shli	off5, oldovl, 4					# 0,4	67
    425 	wrch	$MFC_WrTagMask, oldmask				# 1,6	67
    426 	ceqi	zovl, oldovl, 0					# 0,2	68
    427 #lnop
    428 #nop; lnop
    429 #nop
    430 	fsm	zovl, zovl					# 1,4	70
    431 #nop
    432 	lqx	oldvma, tab5, off5				# 1,6	71
    433 #nop
    434 	lqd	save3, -48($sp)					# 1,6	72
    435 #nop; lnop
    436 	andc	pbit, pbit, zovl				# 0,2	74
    437 	lqd	save2, -32($sp)					# 1,6	74
    438 #ifdef OVLY_IRQ_SAVE
    439 	ila	irqtmp2, do_load90				# 0,2	75
    440 #lnop
    441 	andi	irq_stat, irq_stat, 1				# 0,2	76
    442 #lnop
    443 #else
    444 #nop; lnop
    445 #nop; lnop
    446 #endif
    447 	andc	oldvma, oldvma, pbit				# 0,2	77
    448 	lqd	save1, -16($sp)					# 1,6	77
    449 	nop	       						# 0,0	78
    450 #lnop
    451 #nop
    452 	stqx	oldvma, tab5, off5				# 1,6	79
    453 #nop
    454 #ifdef OVLY_IRQ_SAVE
    455 	binze	irq_stat, irqtmp2				# 1,4	80
    456 do_load90:
    457 #nop
    458 	lqd	save4, -64($sp)					# 1,6	84
    459 #else
    460 #nop; lnop
    461 #endif
    462 
    463 	.global	_ovly_debug_event
    464 	.type	_ovly_debug_event, @function
    465 _ovly_debug_event:
    466 	nop
    467 /* Branch to target address. */
    468 do_load99:
    469 	bi	target						# 1,4	81/85
    470 
    471 	.size	__ovly_load, . - __ovly_load
    472