1 /* Overlay manager for SPU. 2 3 Copyright (C) 2006-2014 Free Software Foundation, Inc. 4 5 This file is part of the GNU Binutils. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, 20 MA 02110-1301, USA. */ 21 22 /* MFC DMA defn's. */ 23 #define MFC_GET_CMD 0x40 24 #define MFC_MAX_DMA_SIZE 0x4000 25 #define MFC_TAG_UPDATE_ALL 2 26 #define MFC_TAG_ID 0 27 28 /* Register usage. */ 29 #define reserved1 $75 30 #define parm $75 31 #define tab1 reserved1 32 #define tab2 reserved1 33 #define vma reserved1 34 #define oldvma reserved1 35 #define newmask reserved1 36 #define map reserved1 37 38 #define reserved2 $76 39 #define off1 reserved2 40 #define off2 reserved2 41 #define present1 reserved2 42 #define present2 reserved2 43 #define sz reserved2 44 #define cmp reserved2 45 #define add64 reserved2 46 #define cgbits reserved2 47 #define off3 reserved2 48 #define off4 reserved2 49 #define addr4 reserved2 50 #define off5 reserved2 51 #define tagstat reserved2 52 53 #define reserved3 $77 54 #define size1 reserved3 55 #define size2 reserved3 56 #define rv3 reserved3 57 #define ealo reserved3 58 #define cmd reserved3 59 #define off64 reserved3 60 #define tab3 reserved3 61 #define tab4 reserved3 62 #define tab5 reserved3 63 64 #define reserved4 $78 65 #define ovl reserved4 66 #define rv2 reserved4 67 #define rv5 reserved4 68 #define cgshuf reserved4 69 #define newovl reserved4 70 #define irqtmp1 reserved4 71 #define irqtmp2 reserved4 72 73 #define reserved5 $79 74 #define target reserved5 75 76 #define save1 $74 77 #define rv4 save1 78 #define rv7 save1 79 #define tagid save1 80 #define maxsize save1 81 #define pbyte save1 82 #define pbit save1 83 84 #define save2 $73 85 #define cur save2 86 #define rv6 save2 87 #define osize save2 88 #define zovl save2 89 #define oldovl save2 90 #define newvma save2 91 92 #define save3 $72 93 #define rv1 save3 94 #define ea64 save3 95 #define buf3 save3 96 #define genwi save3 97 #define newmap save3 98 #define oldmask save3 99 100 #define save4 $71 101 #define irq_stat save4 102 103 .text 104 .align 4 105 .type __rv_pattern, @object 106 .size __rv_pattern, 16 107 __rv_pattern: 108 .word 0x00010203, 0x10111213, 0x80808080, 0x80808080 109 110 .type __cg_pattern, @object 111 .size __cg_pattern, 16 112 __cg_pattern: 113 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 114 115 .type __ovly_current, @object 116 .size __ovly_current, 16 117 __ovly_current: 118 .space 16 119 120 /* 121 * __ovly_return - stub for returning from overlay functions. 122 * 123 * On entry the four slots of $lr are: 124 * __ovly_return, prev ovl index, caller return addr, undefined. 125 * 126 * Load the previous overlay and jump to the caller return address. 127 * Updates __ovly_current. 128 */ 129 .align 4 130 .global __ovly_return 131 .type __ovly_return, @function 132 __ovly_return: 133 ila tab1, _ovly_table - 16 # 0,2 0 134 shlqbyi ovl, $lr, 4 # 1,4 0 135 #nop 136 shlqbyi target, $lr, 8 # 1,4 1 137 #nop; lnop 138 #nop; lnop 139 shli off1, ovl, 4 # 0,4 4 140 #lnop 141 #nop 142 hbr ovly_ret9, target # 1,15 5 143 #nop; lnop 144 #nop; lnop 145 #nop 146 lqx vma, tab1, off1 # 1,6 8 147 #ifdef OVLY_IRQ_SAVE 148 nop 149 stqd save4, -64($sp) # 1,6 9 150 #else 151 #nop; lnop 152 #endif 153 #nop; lnop 154 #nop; lnop 155 #nop; lnop 156 #nop; lnop 157 #nop 158 rotqbyi size1, vma, 4 # 1,4 14 159 #nop 160 stqd save3, -48($sp) # 1,6 15 161 #nop 162 stqd save2, -32($sp) # 1,6 16 163 #nop 164 stqd save1, -16($sp) # 1,6 17 165 andi present1, size1, 1 # 0,2 18 166 stqr ovl, __ovly_current # 1,6 18 167 #nop; lnop 168 #nop 169 brz present1, do_load # 1,4 20 170 ovly_ret9: 171 #nop 172 bi target # 1,4 21 173 174 /* 175 * __ovly_load - copy an overlay partion to local store. 176 * 177 * On entry $75 points to a word consisting of the overlay index in 178 * the top 14 bits, and the target address in the bottom 18 bits. 179 * 180 * Sets up $lr to return via __ovly_return. If $lr is already set 181 * to return via __ovly_return, don't change it. In that case we 182 * have a tail call from one overlay function to another. 183 * Updates __ovly_current. 184 */ 185 .align 3 186 .global __ovly_load 187 .type __ovly_load, @function 188 __ovly_load: 189 #if OVL_STUB_SIZE == 8 190 ######## 191 #nop 192 lqd target, 0(parm) # 1,6 -11 193 #nop; lnop 194 #nop; lnop 195 #nop; lnop 196 #nop; lnop 197 #nop; lnop 198 #nop 199 rotqby target, target, parm # 1,4 -5 200 ila tab2, _ovly_table - 16 # 0,2 -4 201 stqd save3, -48($sp) # 1,6 -4 202 #nop 203 stqd save2, -32($sp) # 1,6 -3 204 #nop 205 stqd save1, -16($sp) # 1,6 -2 206 rotmi ovl, target, -18 # 0,4 -1 207 hbr ovly_load9, target # 1,15 -1 208 ila rv1, __ovly_return # 0,2 0 209 #lnop 210 #nop; lnop 211 #nop 212 lqr cur, __ovly_current # 1,6 2 213 shli off2, ovl, 4 # 0,4 3 214 stqr ovl, __ovly_current # 1,6 3 215 ceq rv2, $lr, rv1 # 0,2 4 216 lqr rv3, __rv_pattern # 1,6 4 217 #nop; lnop 218 #nop; lnop 219 #nop 220 lqx vma, tab2, off2 # 1,6 7 221 ######## 222 #else /* OVL_STUB_SIZE == 16 */ 223 ######## 224 ila tab2, _ovly_table - 16 # 0,2 0 225 stqd save3, -48($sp) # 1,6 0 226 ila rv1, __ovly_return # 0,2 1 227 stqd save2, -32($sp) # 1,6 1 228 shli off2, ovl, 4 # 0,4 2 229 lqr cur, __ovly_current # 1,6 2 230 nop 231 stqr ovl, __ovly_current # 1,6 3 232 ceq rv2, $lr, rv1 # 0,2 4 233 lqr rv3, __rv_pattern # 1,6 4 234 #nop 235 hbr ovly_load9, target # 1,15 5 236 #nop 237 lqx vma, tab2, off2 # 1,6 6 238 #nop 239 stqd save1, -16($sp) # 1,6 7 240 ######## 241 #endif 242 243 #nop; lnop 244 #nop; lnop 245 #nop 246 shufb rv4, rv1, cur, rv3 # 1,4 10 247 #nop 248 fsmb rv5, rv2 # 1,4 11 249 #nop 250 rotqmbyi rv6, $lr, -8 # 1,4 12 251 #nop 252 rotqbyi size2, vma, 4 # 1,4 13 253 #nop 254 lqd save3, -48($sp) # 1,6 14 255 #nop; lnop 256 or rv7, rv4, rv6 # 0,2 16 257 lqd save2, -32($sp) # 1,6 16 258 andi present2, size2, 1 # 0,2 17 259 #ifdef OVLY_IRQ_SAVE 260 stqd save4, -64($sp) # 1,6 17 261 #else 262 lnop # 1,0 17 263 #endif 264 selb $lr, rv7, $lr, rv5 # 0,2 18 265 lqd save1, -16($sp) # 1,6 18 266 #nop 267 brz present2, do_load # 1,4 19 268 ovly_load9: 269 #nop 270 bi target # 1,4 20 271 272 /* If we get here, we are about to load a new overlay. 273 * "vma" contains the relevant entry from _ovly_table[]. 274 * extern struct { 275 * u32 vma; 276 * u32 size; 277 * u32 file_offset; 278 * u32 buf; 279 * } _ovly_table[]; 280 */ 281 .align 3 282 .global __ovly_load_event 283 .type __ovly_load_event, @function 284 __ovly_load_event: 285 do_load: 286 #ifdef OVLY_IRQ_SAVE 287 ila irqtmp1, do_load10 # 0,2 -5 288 rotqbyi sz, vma, 8 # 1,4 -5 289 #nop 290 rdch irq_stat, $SPU_RdMachStat # 1,6 -4 291 #nop 292 bid irqtmp1 # 1,4 -3 293 do_load10: 294 nop 295 #else 296 #nop 297 rotqbyi sz, vma, 8 # 1,4 0 298 #endif 299 rotqbyi osize, vma, 4 # 1,4 1 300 #nop 301 lqa ea64, _EAR_ # 1,6 2 302 #nop 303 lqr cgshuf, __cg_pattern # 1,6 3 304 305 /* We could predict the branch at the end of this loop by adding a few 306 instructions, and there are plenty of free cycles to do so without 307 impacting loop execution time. However, it doesn't make a great 308 deal of sense since we need to wait for the dma to complete anyway. */ 309 __ovly_xfer_loop: 310 #nop 311 rotqmbyi off64, sz, -4 # 1,4 4 312 #nop; lnop 313 #nop; lnop 314 #nop; lnop 315 cg cgbits, ea64, off64 # 0,2 8 316 #lnop 317 #nop; lnop 318 #nop 319 shufb add64, cgbits, cgbits, cgshuf # 1,4 10 320 #nop; lnop 321 #nop; lnop 322 #nop; lnop 323 addx add64, ea64, off64 # 0,2 14 324 #lnop 325 ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15 326 lnop 327 ori ea64, add64, 0 # 0,2 16 328 rotqbyi ealo, add64, 4 # 1,4 16 329 cgt cmp, osize, maxsize # 0,2 17 330 wrch $MFC_LSA, vma # 1,6 17 331 #nop; lnop 332 selb sz, osize, maxsize, cmp # 0,2 19 333 wrch $MFC_EAH, ea64 # 1,6 19 334 ila tagid, MFC_TAG_ID # 0,2 20 335 wrch $MFC_EAL, ealo # 1,6 20 336 ila cmd, MFC_GET_CMD # 0,2 21 337 wrch $MFC_Size, sz # 1,6 21 338 sf osize, sz, osize # 0,2 22 339 wrch $MFC_TagId, tagid # 1,6 22 340 a vma, vma, sz # 0,2 23 341 wrch $MFC_Cmd, cmd # 1,6 23 342 #nop 343 brnz osize, __ovly_xfer_loop # 1,4 24 344 345 /* Now update our data structions while waiting for DMA to complete. 346 Low bit of .size needs to be cleared on the _ovly_table entry 347 corresponding to the evicted overlay, and set on the entry for the 348 newly loaded overlay. Note that no overlay may in fact be evicted 349 as _ovly_buf_table[] starts with all zeros. Don't zap .size entry 350 for zero index! Also of course update the _ovly_buf_table entry. */ 351 #nop 352 lqr newovl, __ovly_current # 1,6 25 353 #nop; lnop 354 #nop; lnop 355 #nop; lnop 356 #nop; lnop 357 #nop; lnop 358 shli off3, newovl, 4 # 0,4 31 359 #lnop 360 ila tab3, _ovly_table - 16 # 0,2 32 361 #lnop 362 #nop 363 fsmbi pbyte, 0x100 # 1,4 33 364 #nop; lnop 365 #nop 366 lqx vma, tab3, off3 # 1,6 35 367 #nop; lnop 368 andi pbit, pbyte, 1 # 0,2 37 369 lnop 370 #nop; lnop 371 #nop; lnop 372 #nop; lnop 373 or newvma, vma, pbit # 0,2 41 374 rotqbyi buf3, vma, 12 # 1,4 41 375 #nop; lnop 376 #nop 377 stqx newvma, tab3, off3 # 1,6 43 378 #nop; lnop 379 shli off4, buf3, 2 # 1,4 45 380 #lnop 381 ila tab4, _ovly_buf_table - 4 # 0,2 46 382 #lnop 383 #nop; lnop 384 #nop; lnop 385 #nop 386 lqx map, tab4, off4 # 1,6 49 387 #nop 388 cwx genwi, tab4, off4 # 1,4 50 389 a addr4, tab4, off4 # 0,2 51 390 #lnop 391 #nop; lnop 392 #nop; lnop 393 #nop; lnop 394 #nop 395 rotqby oldovl, map, addr4 # 1,4 55 396 #nop 397 shufb newmap, newovl, map, genwi # 0,4 56 398 #if MFC_TAG_ID < 16 399 ila newmask, 1 << MFC_TAG_ID # 0,2 57 400 #else 401 ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57 402 #endif 403 #lnop 404 #nop; lnop 405 #nop; lnop 406 stqd newmap, 0(addr4) # 1,6 60 407 408 /* Save app's tagmask, wait for DMA complete, restore mask. */ 409 ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61 410 rdch oldmask, $MFC_RdTagMask # 1,6 61 411 #nop 412 wrch $MFC_WrTagMask, newmask # 1,6 62 413 #nop 414 wrch $MFC_WrTagUpdate, tagstat # 1,6 63 415 #nop 416 rdch tagstat, $MFC_RdTagStat # 1,6 64 417 #nop 418 sync # 1,4 65 419 /* Any hint prior to the sync is lost. A hint here allows the branch 420 to complete 15 cycles after the hint. With no hint the branch will 421 take 18 or 19 cycles. */ 422 ila tab5, _ovly_table - 16 # 0,2 66 423 hbr do_load99, target # 1,15 66 424 shli off5, oldovl, 4 # 0,4 67 425 wrch $MFC_WrTagMask, oldmask # 1,6 67 426 ceqi zovl, oldovl, 0 # 0,2 68 427 #lnop 428 #nop; lnop 429 #nop 430 fsm zovl, zovl # 1,4 70 431 #nop 432 lqx oldvma, tab5, off5 # 1,6 71 433 #nop 434 lqd save3, -48($sp) # 1,6 72 435 #nop; lnop 436 andc pbit, pbit, zovl # 0,2 74 437 lqd save2, -32($sp) # 1,6 74 438 #ifdef OVLY_IRQ_SAVE 439 ila irqtmp2, do_load90 # 0,2 75 440 #lnop 441 andi irq_stat, irq_stat, 1 # 0,2 76 442 #lnop 443 #else 444 #nop; lnop 445 #nop; lnop 446 #endif 447 andc oldvma, oldvma, pbit # 0,2 77 448 lqd save1, -16($sp) # 1,6 77 449 nop # 0,0 78 450 #lnop 451 #nop 452 stqx oldvma, tab5, off5 # 1,6 79 453 #nop 454 #ifdef OVLY_IRQ_SAVE 455 binze irq_stat, irqtmp2 # 1,4 80 456 do_load90: 457 #nop 458 lqd save4, -64($sp) # 1,6 84 459 #else 460 #nop; lnop 461 #endif 462 463 .global _ovly_debug_event 464 .type _ovly_debug_event, @function 465 _ovly_debug_event: 466 nop 467 /* Branch to target address. */ 468 do_load99: 469 bi target # 1,4 81/85 470 471 .size __ovly_load, . - __ovly_load 472