1 // 2 // Detect WAW violations. Cases taken from DV tables. 3 // 4 .text 5 .explicit 6 // AR[BSP] 7 mov ar.bsp = r0 8 mov ar.bsp = r1 9 ;; 10 // AR[BSPSTORE] 11 mov ar.bspstore = r2 12 mov ar.bspstore = r3 13 ;; 14 15 // AR[CCV] 16 mov ar.ccv = r4 17 mov ar.ccv = r4 18 ;; 19 20 // AR[EC] 21 br.wtop.sptk L 22 mov ar.ec = r0 23 ;; 24 25 // AR[FPSR].sf0.controls 26 mov ar.fpsr = r0 27 fsetc.s0 0x7f, 0x0f 28 ;; 29 30 // AR[FPSR].sf1.controls 31 mov ar.fpsr = r0 32 fsetc.s1 0x7f, 0x0f 33 ;; 34 35 // AR[FPSR].sf2.controls 36 mov ar.fpsr = r0 37 fsetc.s2 0x7f, 0x0f 38 ;; 39 40 // AR[FPSR].sf3.controls 41 mov ar.fpsr = r0 42 fsetc.s3 0x7f, 0x0f 43 ;; 44 45 // AR[FPSR].sf0.flags 46 fcmp.eq.s0 p1, p2 = f3, f4 47 fcmp.eq.s0 p3, p4 = f3, f4 // no DV here 48 ;; 49 fcmp.eq.s0 p1, p2 = f3, f4 50 fclrf.s0 51 ;; 52 53 // AR[FPSR].sf1.flags 54 fcmp.eq.s1 p1, p2 = f3, f4 55 fcmp.eq.s1 p3, p4 = f3, f4 // no DV here 56 ;; 57 fcmp.eq.s1 p1, p2 = f3, f4 58 fclrf.s1 59 ;; 60 61 // AR[FPSR].sf2.flags 62 fcmp.eq.s2 p1, p2 = f3, f4 63 fcmp.eq.s2 p3, p4 = f3, f4 // no DV here 64 ;; 65 fcmp.eq.s2 p1, p2 = f3, f4 66 fclrf.s2 67 ;; 68 69 // AR[FPSR].sf3.flags 70 fcmp.eq.s3 p1, p2 = f3, f4 71 fcmp.eq.s3 p3, p4 = f3, f4 // no DV here 72 ;; 73 fcmp.eq.s3 p1, p2 = f3, f4 74 fclrf.s3 75 ;; 76 77 // AR[FPSR].traps/rv plus all controls/flags 78 mov ar.fpsr = r0 79 mov ar.fpsr = r0 80 ;; 81 82 // AR[ITC] 83 mov ar.itc = r1 84 mov ar.itc = r1 85 ;; 86 87 // AR[RUC] 88 mov ar.ruc = r1 89 mov ar.ruc = r1 90 ;; 91 92 // AR[K] 93 mov ar.k2 = r3 94 mov ar.k2 = r3 95 ;; 96 97 // AR[LC] 98 br.cloop.sptk L 99 mov ar.lc = r0 100 ;; 101 102 // AR[PFS] 103 mov ar.pfs = r0 104 br.call.sptk b0 = L 105 ;; 106 107 // AR[RNAT] (see also AR[BSPSTORE]) 108 mov ar.rnat = r8 109 mov ar.rnat = r8 110 ;; 111 112 // AR[RSC] 113 mov ar.rsc = r10 114 mov ar.rsc = r10 115 ;; 116 117 // AR[UNAT] 118 mov ar.unat = r12 119 st8.spill [r0] = r1 120 ;; 121 122 // AR% 123 mov ar48 = r0 124 mov ar48 = r0 125 ;; 126 127 // BR% 128 mov b1 = r0 129 mov b1 = r1 130 ;; 131 132 // CFM (and others) 133 br.wtop.sptk L 134 br.wtop.sptk L 135 ;; 136 137 // CR[CMCV] 138 mov cr.cmcv = r1 139 mov cr.cmcv = r2 140 ;; 141 142 // CR[DCR] 143 mov cr.dcr = r3 144 mov cr.dcr = r3 145 ;; 146 147 // CR[EOI] (and InService) 148 mov cr.eoi = r0 149 mov cr.eoi = r0 150 ;; 151 srlz.d 152 153 // CR[GPTA] 154 mov cr.gpta = r6 155 mov cr.gpta = r7 156 ;; 157 158 // CR[IFA] 159 mov cr.ifa = r9 160 mov cr.ifa = r10 161 ;; 162 163 // CR[IFS] 164 mov cr.ifs = r11 165 cover 166 ;; 167 168 // CR[IHA] 169 mov cr.iha = r13 170 mov cr.iha = r14 171 ;; 172 173 // CR[IIB%] 174 mov cr.iib0 = r15 175 mov cr.iib0 = r16 176 ;; 177 178 mov cr.iib1 = r15 179 mov cr.iib1 = r16 180 ;; 181 182 // CR[IIM] 183 mov cr.iim = r15 184 mov cr.iim = r16 185 ;; 186 187 // CR[IIP] 188 mov cr.iip = r17 189 mov cr.iip = r17 190 ;; 191 192 // CR[IIPA] 193 mov cr.iipa = r19 194 mov cr.iipa = r20 195 ;; 196 197 // CR[IPSR] 198 mov cr.ipsr = r21 199 mov cr.ipsr = r22 200 ;; 201 202 // CR[IRR%] (and others) 203 mov r2 = cr.ivr 204 mov r3 = cr.ivr 205 ;; 206 207 // CR[ISR] 208 mov cr.isr = r24 209 mov cr.isr = r25 210 ;; 211 212 // CR[ITIR] 213 mov cr.itir = r26 214 mov cr.itir = r27 215 ;; 216 217 // CR[ITM] 218 mov cr.itm = r28 219 mov cr.itm = r29 220 ;; 221 222 // CR[ITV] 223 mov cr.itv = r0 224 mov cr.itv = r1 225 ;; 226 227 // CR[IVA] 228 mov cr.iva = r0 229 mov cr.iva = r1 230 ;; 231 232 // CR[IVR] (no explicit writers) 233 234 // CR[LID] 235 mov cr.lid = r0 236 mov cr.lid = r1 237 ;; 238 239 // CR[LRR%] 240 mov cr.lrr0 = r0 241 mov cr.lrr1 = r0 // no DV here 242 ;; 243 mov cr.lrr0 = r0 244 mov cr.lrr0 = r0 245 ;; 246 247 // CR[PMV] 248 mov cr.pmv = r0 249 mov cr.pmv = r1 250 ;; 251 252 // CR[PTA] 253 mov cr.pta = r0 254 mov cr.pta = r1 255 ;; 256 257 // CR[TPR] 258 mov cr.tpr = r0 259 mov cr.tpr = r1 260 ;; 261 262 // DBR# 263 mov dbr[r1] = r1 264 mov dbr[r1] = r2 265 ;; 266 srlz.d 267 268 // DTC 269 ptc.e r0 270 ptc.e r1 // no DVs here 271 ;; 272 ptc.e r0 // (and others) 273 itc.i r0 274 ;; 275 srlz.d 276 277 // DTC_LIMIT 278 ptc.g r0, r1 // NOTE: GAS automatically emits stops after 279 ptc.ga r2, r3 // ptc.g/ptc.ga, so this conflict is no 280 ;; // longer possible in GAS-generated assembly 281 srlz.d 282 283 // DTR 284 itr.d dtr[r0] = r1 // (and others) 285 ptr.d r2, r3 286 ;; 287 srlz.d 288 289 // FR% 290 mov f3 = f2 291 ldfs.c.clr f3 = [r1] 292 ;; 293 294 // GR% 295 mov r2 = r0 296 ld8.c.clr r2 = [r1] 297 ;; 298 299 // IBR# 300 mov ibr[r0] = r2 301 mov ibr[r1] = r2 302 ;; 303 304 // InService 305 mov cr.eoi = r0 306 mov r1 = cr.ivr 307 ;; 308 srlz.d 309 310 // ITC 311 ptc.e r0 312 itc.i r1 313 ;; 314 srlz.i 315 ;; 316 317 // ITR 318 itr.i itr[r0] = r1 319 ptr.i r2, r3 320 ;; 321 srlz.i 322 ;; 323 324 // PKR# 325 .reg.val r1, 0x1 326 .reg.val r2, ~0x1 327 mov pkr[r1] = r1 328 mov pkr[r2] = r1 // no DV here 329 ;; 330 mov pkr[r1] = r1 331 mov pkr[r1] = r1 332 ;; 333 334 // PMC# 335 mov pmc[r3] = r1 336 mov pmc[r4] = r1 337 ;; 338 339 // PMD# 340 mov pmd[r3] = r1 341 mov pmd[r4] = r1 342 ;; 343 344 // PR%, 1 - 15 345 cmp.eq p1, p0 = r0, r1 346 cmp.eq p1, p0 = r2, r3 347 ;; 348 fcmp.eq p1, p2 = f2, f3 349 fcmp.eq p1, p3 = f2, f3 350 ;; 351 cmp.eq.and p1, p2 = r0, r1 352 cmp.eq.or p1, p3 = r2, r3 353 ;; 354 cmp.eq.or p1, p3 = r2, r3 355 cmp.eq.and p1, p2 = r0, r1 356 ;; 357 cmp.eq.and p1, p2 = r0, r1 358 cmp.eq.and p1, p3 = r2, r3 // no DV here 359 ;; 360 cmp.eq.or p1, p2 = r0, r1 361 cmp.eq.or p1, p3 = r2, r3 // no DV here 362 ;; 363 364 // PR63 365 br.wtop.sptk L 366 br.wtop.sptk L 367 ;; 368 cmp.eq p63, p0 = r0, r1 369 cmp.eq p63, p0 = r2, r3 370 ;; 371 fcmp.eq p63, p2 = f2, f3 372 fcmp.eq p63, p3 = f2, f3 373 ;; 374 cmp.eq.and p63, p2 = r0, r1 375 cmp.eq.or p63, p3 = r2, r3 376 ;; 377 cmp.eq.or p63, p3 = r2, r3 378 cmp.eq.and p63, p2 = r0, r1 379 ;; 380 cmp.eq.and p63, p2 = r0, r1 381 cmp.eq.and p63, p3 = r2, r3 // no DV here 382 ;; 383 cmp.eq.or p63, p2 = r0, r1 384 cmp.eq.or p63, p3 = r2, r3 // no DV here 385 ;; 386 387 // PSR.ac 388 rum (1<<3) 389 rum (1<<3) 390 ;; 391 392 // PSR.be 393 rum (1<<1) 394 rum (1<<1) 395 ;; 396 397 // PSR.bn 398 bsw.0 // GAS automatically emits a stop after bsw.n 399 bsw.0 // so this conflict is avoided 400 ;; 401 402 // PSR.cpl 403 epc 404 br.ret.sptk b0 405 ;; 406 407 // PSR.da (rfi is the only writer) 408 // PSR.db (and others) 409 mov psr.l = r0 410 mov psr.l = r1 411 ;; 412 srlz.d 413 414 // PSR.dd (rfi is the only writer) 415 416 // PSR.dfh 417 ssm (1<<19) 418 ssm (1<<19) 419 ;; 420 srlz.d 421 422 // PSR.dfl 423 ssm (1<<18) 424 ssm (1<<18) 425 ;; 426 srlz.d 427 428 // PSR.di 429 rsm (1<<22) 430 rsm (1<<22) 431 ;; 432 433 // PSR.dt 434 rsm (1<<17) 435 rsm (1<<17) 436 ;; 437 438 // PSR.ed (rfi is the only writer) 439 // PSR.i 440 ssm (1<<14) 441 ssm (1<<14) 442 ;; 443 444 // PSR.ia (no DV semantics) 445 // PSR.ic 446 ssm (1<<13) 447 ssm (1<<13) 448 ;; 449 450 // PSR.id (rfi is the only writer) 451 // PSR.is (br.ia and rfi are the only writers) 452 // PSR.it (rfi is the only writer) 453 // PSR.lp (see PSR.db) 454 455 // PSR.mc (rfi is the only writer) 456 // PSR.mfh 457 mov f32 = f33 458 mov r10 = psr 459 ;; 460 ssm (1<<5) 461 ssm (1<<5) 462 ;; 463 ssm (1<<5) 464 mov psr.um = r10 465 ;; 466 rum (1<<5) 467 rum (1<<5) 468 ;; 469 mov f32 = f33 470 mov f34 = f35 // no DV here 471 ;; 472 473 // PSR.mfl 474 mov f2 = f3 475 mov r10 = psr 476 ;; 477 ssm (1<<4) 478 ssm (1<<4) 479 ;; 480 ssm (1<<4) 481 mov psr.um = r10 482 ;; 483 rum (1<<4) 484 rum (1<<4) 485 ;; 486 mov f2 = f3 487 mov f4 = f5 // no DV here 488 ;; 489 490 // PSR.pk 491 rsm (1<<15) 492 rsm (1<<15) 493 ;; 494 495 // PSR.pp 496 rsm (1<<21) 497 rsm (1<<21) 498 ;; 499 500 // PSR.ri (no DV semantics) 501 // PSR.rt (see PSR.db) 502 503 // PSR.si 504 rsm (1<<23) 505 ssm (1<<23) 506 ;; 507 508 // PSR.sp 509 ssm (1<<20) 510 rsm (1<<20) 511 ;; 512 srlz.d 513 514 // PSR.ss (rfi is the only writer) 515 // PSR.tb (see PSR.db) 516 517 // PSR.up 518 rsm (1<<2) 519 rsm (1<<2) 520 ;; 521 rum (1<<2) 522 mov psr.um = r0 523 ;; 524 525 // RR# 526 mov rr[r2] = r1 527 mov rr[r2] = r3 528 ;; 529 530 // PR, additional cases (or.andcm and and.orcm interaction) 531 cmp.eq.or.andcm p6, p7 = 1, r32 532 cmp.eq.or.andcm p6, p7 = 5, r36 // no DV here 533 ;; 534 cmp.eq.and.orcm p6, p7 = 1, r32 535 cmp.eq.and.orcm p6, p7 = 5, r36 // no DV here 536 ;; 537 cmp.eq.or.andcm p63, p7 = 1, r32 538 cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here 539 ;; 540 cmp.eq.or.andcm p6, p63 = 1, r32 541 cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here 542 ;; 543 cmp.eq.and.orcm p63, p7 = 1, r32 544 cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here 545 ;; 546 cmp.eq.and.orcm p6, p63 = 1, r32 547 cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here 548 ;; 549 cmp.eq.or.andcm p6, p7 = 1, r32 550 cmp.eq.and.orcm p6, p7 = 5, r36 551 ;; 552 cmp.eq.or.andcm p63, p7 = 1, r32 553 cmp.eq.and.orcm p63, p7 = 5, r36 554 ;; 555 cmp.eq.or.andcm p6, p63 = 1, r32 556 cmp.eq.and.orcm p6, p63 = 5, r36 557 ;; 558 559 // PR%, 16 - 62 560 cmp.eq p21, p0 = r0, r1 561 cmp.eq p21, p0 = r2, r3 562 ;; 563 fcmp.eq p21, p22 = f2, f3 564 fcmp.eq p21, p23 = f2, f3 565 ;; 566 cmp.eq.and p21, p22 = r0, r1 567 cmp.eq.or p21, p23 = r2, r3 568 ;; 569 cmp.eq.or p21, p23 = r2, r3 570 cmp.eq.and p21, p22 = r0, r1 571 ;; 572 cmp.eq.and p21, p22 = r0, r1 573 cmp.eq.and p21, p23 = r2, r3 // no DV here 574 ;; 575 cmp.eq.or p21, p22 = r0, r1 576 cmp.eq.or p21, p23 = r2, r3 // no DV here 577 ;; 578 579 // RSE 580 581 L: 582