1 # Test new instructions 2 branchpoint: 3 4 .text 5 .global bcl 6 bcl: 7 bcl branchpoint 8 9 .text 10 .global bncl 11 bncl: 12 bncl branchpoint 13 14 .text 15 .global cmpz 16 cmpz: 17 cmpz fp 18 19 .text 20 .global cmpeq 21 cmpeq: 22 cmpeq fp, fp 23 24 .text 25 .global maclh1 26 maclh1: 27 maclh1 fp, fp 28 29 .text 30 .global macsl0 31 msblo: 32 msblo fp, fp 33 34 .text 35 .global mulwu1 36 mulwu1: 37 mulwu1 fp, fp 38 39 .text 40 .global macwu1 41 macwu1: 42 macwu1 fp, fp 43 44 .text 45 .global sadd 46 sadd: 47 sadd 48 49 .text 50 .global satb 51 satb: 52 satb fp, fp 53 54 55 .text 56 .global mulhi 57 mulhi: 58 mulhi fp, fp, a1 59 60 .text 61 .global mullo 62 mullo: 63 mullo fp, fp, a0 64 65 .text 66 .global divh 67 divh: 68 divh fp, fp 69 70 .text 71 .global machi 72 machi: 73 machi fp, fp, a1 74 75 .text 76 .global maclo 77 maclo: 78 maclo fp, fp, a0 79 80 .text 81 .global mvfachi 82 mvfachi: 83 mvfachi fp, a1 84 85 .text 86 .global mvfacmi 87 mvfacmi: 88 mvfacmi fp, a1 89 90 .text 91 .global mvfaclo 92 mvfaclo: 93 mvfaclo fp, a1 94 95 .text 96 .global mvtachi 97 mvtachi: 98 mvtachi fp, a1 99 100 .text 101 .global mvtaclo 102 mvtaclo: 103 mvtaclo fp, a0 104 105 .text 106 .global rac 107 rac: 108 rac a1 109 110 .text 111 .global rac_ds 112 rac_ds: 113 rac a1, a0 114 115 .text 116 .global rac_dsi 117 rac_dsi: 118 rac a0, a1, #1 119 120 .text 121 .global rach 122 rach: 123 rach a1 124 125 .text 126 .global rach_ds 127 rach_ds: 128 rach a0, a1 129 130 .text 131 .global rach_dsi 132 rach_dsi: 133 rach a1, a0, #2 134 135 # Test explicitly parallel and implicitly parallel instructions 136 # Including apparent instruction sequence reordering. 137 .text 138 .global bc__add 139 bc__add: 140 bc bcl || add fp, fp 141 # Use bc.s here as bc is relaxable and thus a nop will be emitted. 142 bc.s bcl 143 add fp, fp 144 145 .text 146 .global bcl__addi 147 bcl__addi: 148 bcl bcl || addi fp, #77 149 addi fp, #77 150 # Use bcl.s here as bcl is relaxable and thus the parallelization won't happen. 151 bcl.s bcl 152 153 .text 154 .global bl__addv 155 bl__addv: 156 bl bcl || addv fp, fp 157 addv fp, fp 158 # Use bl.s here as bl is relaxable and thus the parallelization won't happen. 159 bl.s bcl 160 161 .text 162 .global bnc__addx 163 bnc__addx: 164 bnc bcl || addx fp, fp 165 # Use bnc.s here as bnc is relaxable and thus the parallelization attempt won't 166 # happen. Things still won't be parallelized, but we want this test to try. 167 bnc.s bcl 168 addx fp, fp 169 170 .text 171 .global bncl__and 172 bncl__and: 173 bncl bcl || and fp, fp 174 and fp, fp 175 bncl.s bcl 176 177 .text 178 .global bra__cmp 179 bra__cmp: 180 bra bcl || cmp fp, fp 181 cmp fp, fp 182 # Use bra.s here as bra is relaxable and thus the parallelization won't happen. 183 bra.s bcl 184 185 .text 186 .global jl__cmpeq 187 jl__cmpeq: 188 jl fp || cmpeq fp, fp 189 cmpeq fp, fp 190 jl fp 191 192 .text 193 .global jmp__cmpu 194 jmp__cmpu: 195 jmp fp || cmpu fp, fp 196 cmpu fp, fp 197 jmp fp 198 199 .text 200 .global ld__cmpz 201 ld__cmpz: 202 ld fp, @fp || cmpz r1 203 cmpz r1 204 ld fp, @fp 205 206 .text 207 .global ld__ldi 208 ld__ldi: 209 ld fp, @r1+ || ldi r2, #77 210 ld fp, @r1+ 211 ldi r2, #77 212 213 .text 214 .global ldb__mv 215 ldb__mv: 216 ldb fp, @fp || mv r2, fp 217 ldb fp, @fp 218 mv r2, fp 219 220 .text 221 .global ldh__neg 222 ldh__neg: 223 ldh fp, @fp || neg r2, fp 224 ldh fp, @fp 225 neg r2, fp 226 227 .text 228 .global ldub__nop 229 ldub__nop: 230 ldub fp, @fp || nop 231 ldub fp, @fp 232 nop 233 234 .text 235 .global lduh__not 236 lduh__not: 237 lduh fp, @fp || not r2, fp 238 lduh fp, @fp 239 not r2, fp 240 241 .text 242 .global lock__or 243 lock__or: 244 lock fp, @fp || or r2, fp 245 lock fp, @fp 246 or r2, fp 247 248 .text 249 .global mvfc__sub 250 mvfc__sub: 251 mvfc fp, cr1 || sub r2, fp 252 mvfc fp, cr1 253 sub r2, fp 254 255 .text 256 .global mvtc__subv 257 mvtc__subv: 258 mvtc fp, cr2 || subv r2, fp 259 mvtc fp, cr2 260 subv r2, fp 261 262 .text 263 .global rte__subx 264 rte__subx: 265 rte || sub r2, fp 266 rte 267 subx r2, fp 268 269 .text 270 .global sll__xor 271 sll__xor: 272 sll fp, r1 || xor r2, fp 273 sll fp, r1 274 xor r2, fp 275 276 .text 277 .global slli__machi 278 slli__machi: 279 slli fp, #22 || machi r2, fp 280 slli fp, #22 281 machi r2, fp 282 283 .text 284 .global sra__maclh1 285 sra__maclh1: 286 sra fp, fp || maclh1 r2, fp 287 sra fp, fp 288 maclh1 r2, fp 289 290 .text 291 .global srai__maclo 292 srai__maclo: 293 srai fp, #22 || maclo r2, fp 294 srai fp, #22 295 maclo r2, fp 296 297 .text 298 .global srl__macwhi 299 srl__macwhi: 300 srl fp, fp || macwhi r2, fp 301 srl fp, fp 302 macwhi r2, fp 303 304 .text 305 .global srli__macwlo 306 srli__macwlo: 307 srli fp, #22 || macwlo r2, fp 308 srli fp, #22 309 macwlo r2, fp 310 311 .text 312 .global st__macwu1 313 st__macwu1: 314 st fp, @fp || macwu1 r2, fp 315 st fp, @fp 316 macwu1 r2, fp 317 318 .text 319 .global st__msblo 320 st__msblo: 321 st fp, @+fp || msblo r2, fp 322 st fp, @+fp 323 msblo r2, fp 324 325 .text 326 .global st__mul 327 st__mul: 328 st fp, @-fp || mul r2, fp 329 st fp, @-fp 330 mul r2, fp 331 332 .text 333 .global stb__mulhi 334 stb__mulhi: 335 stb fp, @fp || mulhi r2, fp 336 stb fp, @fp 337 mulhi r2, fp 338 339 .text 340 .global sth__mullo 341 sth__mullo: 342 sth fp, @fp || mullo r2, fp 343 sth fp, @fp 344 mullo r2, fp 345 346 .text 347 .global trap__mulwhi 348 trap__mulwhi: 349 trap #2 || mulwhi r2, fp 350 trap #2 351 mulwhi r2, fp 352 353 .text 354 .global unlock__mulwlo 355 unlock__mulwlo: 356 unlock fp, @fp || mulwlo r2, fp 357 unlock fp, @fp 358 mulwlo r2, fp 359 360 .text 361 .global add__mulwu1 362 add__mulwu1: 363 add fp, fp || mulwu1 r2, fp 364 add fp, fp 365 mulwu1 r2, fp 366 367 .text 368 .global addi__mvfachi 369 addi__mvfachi: 370 addi fp, #77 || mvfachi r2, a0 371 addi fp, #77 372 mvfachi r2, a0 373 374 .text 375 .global addv__mvfaclo 376 addv__mvfaclo: 377 addv fp, fp || mvfaclo r2, a1 378 addv fp, fp 379 mvfaclo r2, a1 380 381 .text 382 .global addx__mvfacmi 383 addx__mvfacmi: 384 addx fp, fp || mvfacmi r2, a0 385 addx fp, fp 386 mvfacmi r2, a0 387 388 .text 389 .global and__mvtachi 390 and__mvtachi: 391 and fp, fp || mvtachi r2, a0 392 and fp, fp 393 mvtachi r2, a0 394 395 .text 396 .global cmp__mvtaclo 397 cmp__mvtaclo: 398 cmp fp, fp || mvtaclo r2, a0 399 cmp fp, fp 400 mvtaclo r2, a0 401 402 .text 403 .global cmpeq__rac 404 cmpeq__rac: 405 cmpeq fp, fp || rac a1 406 cmpeq fp, fp 407 rac a1 408 409 .text 410 .global cmpu__rach 411 cmpu__rach: 412 cmpu fp, fp || rach a0, a1 413 cmpu fp, fp 414 rach a1, a1, #1 415 416 .text 417 .global cmpz__sadd 418 cmpz__sadd: 419 cmpz fp || sadd 420 cmpz fp 421 sadd 422 423 424 425 # Test private instructions 426 .text 427 .global sc 428 sc: 429 sc 430 sadd 431 432 .text 433 .global snc 434 snc: 435 snc 436 sadd 437 438 .text 439 .global jc 440 jc: 441 jc fp 442 443 .text 444 .global jnc 445 jnc: 446 jnc fp 447 448 .text 449 .global pcmpbz 450 pcmpbz: 451 pcmpbz fp 452 453 .text 454 .global sat 455 sat: 456 sat fp, fp 457 458 .text 459 .global sath 460 sath: 461 sath fp, fp 462 463 464 # Test parallel versions of the private instructions 465 466 .text 467 .global jc__pcmpbz 468 jc__pcmpbz: 469 jc fp || pcmpbz fp 470 jc fp 471 pcmpbz fp 472 473 .text 474 .global jnc__ldi 475 jnc__ldi: 476 jnc fp || ldi fp, #77 477 jnc fp 478 ldi fp, #77 479 480 .text 481 .global sc__mv 482 sc__mv: 483 sc || mv fp, r2 484 sc 485 mv fp, r2 486 487 .text 488 .global snc__neg 489 snc__neg: 490 snc || neg fp, r2 491 snc 492 neg fp, r2 493 494 # Test automatic and explicit parallelisation of instructions 495 .text 496 .global nop__sadd 497 nop__sadd: 498 nop 499 sadd 500 501 .text 502 .global sadd__nop 503 sadd__nop: 504 sadd 505 nop 506 507 .text 508 .global sadd__nop_reverse 509 sadd__nop_reverse: 510 sadd || nop 511 512 .text 513 .global add__not 514 add__not: 515 add r0, r1 516 not r3, r5 517 518 .text 519 .global add__not__dest_clash 520 add__not_dest_clash: 521 add r3, r4 522 not r3, r5 523 524 .text 525 .global add__not__src_clash 526 add__not__src_clash: 527 add r3, r4 528 not r5, r3 529 530 .text 531 .global add__not__no_clash 532 add__not__no_clash: 533 add r3, r4 534 not r4, r5 535 536 .text 537 .global mul__sra 538 mul__sra: 539 mul r1, r2 540 sra r3, r4 541 542 .text 543 .global mul__sra__reverse_src_clash 544 mul__sra__reverse_src_clash: 545 mul r1, r3 546 sra r3, r4 547 548 .text 549 .global bc__add_ 550 bc__add_: 551 bc.s label 552 add r1, r2 553 554 .text 555 .global add__bc 556 add__bc: 557 add r3, r4 558 bc.s label 559 560 .text 561 .global bc__add__forced_parallel 562 bc__add__forced_parallel: 563 bc label || add r5, r6 564 565 .text 566 .global add__bc__forced_parallel 567 add__bc__forced_parallel: 568 add r7, r8 || bc label 569 label: 570 nop 571 572 ; Additional testcases. 573 ; These insns were added to the chip later. 574 575 .text 576 mulwhi: 577 mulwhi fp, fp, a0 578 mulwhi fp, fp, a1 579 580 mulwlo: 581 mulwlo fp, fp, a0 582 mulwlo fp, fp, a1 583 584 macwhi: 585 macwhi fp, fp, a0 586 macwhi fp, fp, a1 587 588 macwlo: 589 macwlo fp, fp, a0 590 macwlo fp, fp, a1 591