1 2 /*---------------------------------------------------------------*/ 3 /*--- begin ir_opt.c ---*/ 4 /*---------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 #include "libvex_basictypes.h" 37 #include "libvex_ir.h" 38 #include "libvex.h" 39 40 #include "main_util.h" 41 #include "main_globals.h" 42 #include "ir_opt.h" 43 44 45 /* Set to 1 for lots of debugging output. */ 46 #define DEBUG_IROPT 0 47 48 49 /* What iropt does, 29 Dec 04. 50 51 It takes an IRSB and produces a new one with the same meaning, 52 defined thus: 53 54 After execution of the new BB, all guest state and guest memory is 55 the same as after execution of the original. This is true 56 regardless of how the block was exited (at the end vs side exit). 57 58 In addition, parts of the guest state will be identical to that 59 created by execution of the original at the following observation 60 points: 61 62 * In a dirty helper call, any parts of the guest state that the 63 helper states that it reads or modifies will be up to date. 64 Also, guest memory will be up to date. Parts of the guest state 65 not marked as being read or modified by the helper cannot be 66 assumed to be up-to-date at the point where the helper is called. 67 68 * Immediately prior to any load or store, those parts of the guest 69 state marked as requiring precise exceptions will be up to date. 70 Also, guest memory will be up to date. Parts of the guest state 71 not marked as requiring precise exceptions cannot be assumed to 72 be up-to-date at the point of the load/store. 73 74 The relative order of loads and stores (including loads/stores of 75 guest memory done by dirty helpers annotated as such) is not 76 changed. However, the relative order of loads with no intervening 77 stores/modifies may be changed. 78 79 Transformation order 80 ~~~~~~~~~~~~~~~~~~~~ 81 82 There are three levels of optimisation, controlled by 83 vex_control.iropt_level. Define first: 84 85 "Cheap transformations" are the following sequence: 86 * Redundant-Get removal 87 * Redundant-Put removal 88 * Constant propagation/folding 89 * Dead code removal 90 * Specialisation of clean helper functions 91 * Dead code removal 92 93 "Expensive transformations" are the following sequence: 94 * CSE 95 * Folding of add/sub chains 96 * Redundant-GetI removal 97 * Redundant-PutI removal 98 * Dead code removal 99 100 Then the transformations are as follows, as defined by 101 vex_control.iropt_level: 102 103 Level 0: 104 * Flatten into atomic form. 105 106 Level 1: the following sequence: 107 * Flatten into atomic form. 108 * Cheap transformations. 109 110 Level 2: the following sequence 111 * Flatten into atomic form. 112 * Cheap transformations. 113 * If block contains any floating or vector types, CSE. 114 * If block contains GetI or PutI, Expensive transformations. 115 * Try unrolling loops. Three possible outcomes: 116 - No effect: do nothing more. 117 - Unrolled a loop, and block does not contain GetI or PutI: 118 Do: * CSE 119 * Dead code removal 120 - Unrolled a loop, and block contains GetI or PutI: 121 Do: * Expensive transformations 122 * Cheap transformations 123 */ 124 125 /* Implementation notes, 29 Dec 04. 126 127 TODO (important): I think rPutI removal ignores precise exceptions 128 and is therefore in a sense, wrong. In the sense that PutIs are 129 assumed not to write parts of the guest state that we need to have 130 up-to-date at loads/stores. So far on x86 guest that has not 131 mattered since indeed only the x87 FP registers and tags are 132 accessed using GetI/PutI, and there is no need so far for them to 133 be up to date at mem exception points. The rPutI pass should be 134 fixed. 135 136 TODO: improve pessimistic handling of precise exceptions 137 in the tree builder. 138 139 TODO: check interaction of rGetI and dirty helpers. 140 141 F64i constants are treated differently from other constants. 142 They are not regarded as atoms, and instead lifted off and 143 bound to temps. This allows them to participate in CSE, which 144 is important for getting good performance for x86 guest code. 145 146 CSE up F64 literals (already doing F64is) 147 148 CSE: consider carefully the requirement for precise exns 149 prior to making CSE any more aggressive. */ 150 151 152 /*---------------------------------------------------------------*/ 153 /*--- Finite mappery, of a sort ---*/ 154 /*---------------------------------------------------------------*/ 155 156 /* General map from HWord-sized thing HWord-sized thing. Could be by 157 hashing, but it's not clear whether or not this would really be any 158 faster. */ 159 160 typedef 161 struct { 162 Bool* inuse; 163 HWord* key; 164 HWord* val; 165 Int size; 166 Int used; 167 } 168 HashHW; 169 170 static HashHW* newHHW ( void ) 171 { 172 HashHW* h = LibVEX_Alloc(sizeof(HashHW)); 173 h->size = 8; 174 h->used = 0; 175 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool)); 176 h->key = LibVEX_Alloc(h->size * sizeof(HWord)); 177 h->val = LibVEX_Alloc(h->size * sizeof(HWord)); 178 return h; 179 } 180 181 182 /* Look up key in the map. */ 183 184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key ) 185 { 186 Int i; 187 /* vex_printf("lookupHHW(%llx)\n", key ); */ 188 for (i = 0; i < h->used; i++) { 189 if (h->inuse[i] && h->key[i] == key) { 190 if (val) 191 *val = h->val[i]; 192 return True; 193 } 194 } 195 return False; 196 } 197 198 199 /* Add key->val to the map. Replaces any existing binding for key. */ 200 201 static void addToHHW ( HashHW* h, HWord key, HWord val ) 202 { 203 Int i, j; 204 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */ 205 206 /* Find and replace existing binding, if any. */ 207 for (i = 0; i < h->used; i++) { 208 if (h->inuse[i] && h->key[i] == key) { 209 h->val[i] = val; 210 return; 211 } 212 } 213 214 /* Ensure a space is available. */ 215 if (h->used == h->size) { 216 /* Copy into arrays twice the size. */ 217 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool)); 218 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord)); 219 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord)); 220 for (i = j = 0; i < h->size; i++) { 221 if (!h->inuse[i]) continue; 222 inuse2[j] = True; 223 key2[j] = h->key[i]; 224 val2[j] = h->val[i]; 225 j++; 226 } 227 h->used = j; 228 h->size *= 2; 229 h->inuse = inuse2; 230 h->key = key2; 231 h->val = val2; 232 } 233 234 /* Finally, add it. */ 235 vassert(h->used < h->size); 236 h->inuse[h->used] = True; 237 h->key[h->used] = key; 238 h->val[h->used] = val; 239 h->used++; 240 } 241 242 243 /*---------------------------------------------------------------*/ 244 /*--- Flattening out a BB into atomic SSA form ---*/ 245 /*---------------------------------------------------------------*/ 246 247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp 248 copies made by flattening. If in doubt return False. */ 249 250 static Bool isFlat ( IRExpr* e ) 251 { 252 if (e->tag == Iex_Get) 253 return True; 254 if (e->tag == Iex_Binop) 255 return toBool( isIRAtom(e->Iex.Binop.arg1) 256 && isIRAtom(e->Iex.Binop.arg2) ); 257 if (e->tag == Iex_Load) 258 return isIRAtom(e->Iex.Load.addr); 259 return False; 260 } 261 262 /* Flatten out 'ex' so it is atomic, returning a new expression with 263 the same value, after having appended extra IRTemp assignments to 264 the end of 'bb'. */ 265 266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) 267 { 268 Int i; 269 IRExpr** newargs; 270 IRType ty = typeOfIRExpr(bb->tyenv, ex); 271 IRTemp t1; 272 273 switch (ex->tag) { 274 275 case Iex_GetI: 276 t1 = newIRTemp(bb->tyenv, ty); 277 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 278 IRExpr_GetI(ex->Iex.GetI.descr, 279 flatten_Expr(bb, ex->Iex.GetI.ix), 280 ex->Iex.GetI.bias))); 281 return IRExpr_RdTmp(t1); 282 283 case Iex_Get: 284 t1 = newIRTemp(bb->tyenv, ty); 285 addStmtToIRSB(bb, 286 IRStmt_WrTmp(t1, ex)); 287 return IRExpr_RdTmp(t1); 288 289 case Iex_Qop: 290 t1 = newIRTemp(bb->tyenv, ty); 291 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 292 IRExpr_Qop(ex->Iex.Qop.op, 293 flatten_Expr(bb, ex->Iex.Qop.arg1), 294 flatten_Expr(bb, ex->Iex.Qop.arg2), 295 flatten_Expr(bb, ex->Iex.Qop.arg3), 296 flatten_Expr(bb, ex->Iex.Qop.arg4)))); 297 return IRExpr_RdTmp(t1); 298 299 case Iex_Triop: 300 t1 = newIRTemp(bb->tyenv, ty); 301 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 302 IRExpr_Triop(ex->Iex.Triop.op, 303 flatten_Expr(bb, ex->Iex.Triop.arg1), 304 flatten_Expr(bb, ex->Iex.Triop.arg2), 305 flatten_Expr(bb, ex->Iex.Triop.arg3)))); 306 return IRExpr_RdTmp(t1); 307 308 case Iex_Binop: 309 t1 = newIRTemp(bb->tyenv, ty); 310 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 311 IRExpr_Binop(ex->Iex.Binop.op, 312 flatten_Expr(bb, ex->Iex.Binop.arg1), 313 flatten_Expr(bb, ex->Iex.Binop.arg2)))); 314 return IRExpr_RdTmp(t1); 315 316 case Iex_Unop: 317 t1 = newIRTemp(bb->tyenv, ty); 318 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 319 IRExpr_Unop(ex->Iex.Unop.op, 320 flatten_Expr(bb, ex->Iex.Unop.arg)))); 321 return IRExpr_RdTmp(t1); 322 323 case Iex_Load: 324 t1 = newIRTemp(bb->tyenv, ty); 325 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 326 IRExpr_Load(ex->Iex.Load.end, 327 ex->Iex.Load.ty, 328 flatten_Expr(bb, ex->Iex.Load.addr)))); 329 return IRExpr_RdTmp(t1); 330 331 case Iex_CCall: 332 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); 333 for (i = 0; newargs[i]; i++) 334 newargs[i] = flatten_Expr(bb, newargs[i]); 335 t1 = newIRTemp(bb->tyenv, ty); 336 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 337 IRExpr_CCall(ex->Iex.CCall.cee, 338 ex->Iex.CCall.retty, 339 newargs))); 340 return IRExpr_RdTmp(t1); 341 342 case Iex_Mux0X: 343 t1 = newIRTemp(bb->tyenv, ty); 344 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 345 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond), 346 flatten_Expr(bb, ex->Iex.Mux0X.expr0), 347 flatten_Expr(bb, ex->Iex.Mux0X.exprX)))); 348 return IRExpr_RdTmp(t1); 349 350 case Iex_Const: 351 /* Lift F64i constants out onto temps so they can be CSEd 352 later. */ 353 if (ex->Iex.Const.con->tag == Ico_F64i) { 354 t1 = newIRTemp(bb->tyenv, ty); 355 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 356 IRExpr_Const(ex->Iex.Const.con))); 357 return IRExpr_RdTmp(t1); 358 } else { 359 /* Leave all other constants alone. */ 360 return ex; 361 } 362 363 case Iex_RdTmp: 364 return ex; 365 366 default: 367 vex_printf("\n"); 368 ppIRExpr(ex); 369 vex_printf("\n"); 370 vpanic("flatten_Expr"); 371 } 372 } 373 374 375 /* Append a completely flattened form of 'st' to the end of 'bb'. */ 376 377 static void flatten_Stmt ( IRSB* bb, IRStmt* st ) 378 { 379 Int i; 380 IRExpr *e1, *e2, *e3, *e4, *e5; 381 IRDirty *d, *d2; 382 IRCAS *cas, *cas2; 383 switch (st->tag) { 384 case Ist_Put: 385 if (isIRAtom(st->Ist.Put.data)) { 386 /* optimisation to reduce the amount of heap wasted 387 by the flattener */ 388 addStmtToIRSB(bb, st); 389 } else { 390 /* general case, always correct */ 391 e1 = flatten_Expr(bb, st->Ist.Put.data); 392 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); 393 } 394 break; 395 case Ist_PutI: 396 e1 = flatten_Expr(bb, st->Ist.PutI.ix); 397 e2 = flatten_Expr(bb, st->Ist.PutI.data); 398 addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr, 399 e1, 400 st->Ist.PutI.bias, 401 e2)); 402 break; 403 case Ist_WrTmp: 404 if (isFlat(st->Ist.WrTmp.data)) { 405 /* optimisation, to reduce the number of tmp-tmp 406 copies generated */ 407 addStmtToIRSB(bb, st); 408 } else { 409 /* general case, always correct */ 410 e1 = flatten_Expr(bb, st->Ist.WrTmp.data); 411 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); 412 } 413 break; 414 case Ist_Store: 415 e1 = flatten_Expr(bb, st->Ist.Store.addr); 416 e2 = flatten_Expr(bb, st->Ist.Store.data); 417 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); 418 break; 419 case Ist_CAS: 420 cas = st->Ist.CAS.details; 421 e1 = flatten_Expr(bb, cas->addr); 422 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; 423 e3 = flatten_Expr(bb, cas->expdLo); 424 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; 425 e5 = flatten_Expr(bb, cas->dataLo); 426 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, 427 e1, e2, e3, e4, e5 ); 428 addStmtToIRSB(bb, IRStmt_CAS(cas2)); 429 break; 430 case Ist_LLSC: 431 e1 = flatten_Expr(bb, st->Ist.LLSC.addr); 432 e2 = st->Ist.LLSC.storedata 433 ? flatten_Expr(bb, st->Ist.LLSC.storedata) 434 : NULL; 435 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, 436 st->Ist.LLSC.result, e1, e2)); 437 break; 438 case Ist_Dirty: 439 d = st->Ist.Dirty.details; 440 d2 = emptyIRDirty(); 441 *d2 = *d; 442 d2->args = shallowCopyIRExprVec(d2->args); 443 if (d2->mFx != Ifx_None) { 444 d2->mAddr = flatten_Expr(bb, d2->mAddr); 445 } else { 446 vassert(d2->mAddr == NULL); 447 } 448 d2->guard = flatten_Expr(bb, d2->guard); 449 for (i = 0; d2->args[i]; i++) 450 d2->args[i] = flatten_Expr(bb, d2->args[i]); 451 addStmtToIRSB(bb, IRStmt_Dirty(d2)); 452 break; 453 case Ist_NoOp: 454 case Ist_MBE: 455 case Ist_IMark: 456 addStmtToIRSB(bb, st); 457 break; 458 case Ist_AbiHint: 459 e1 = flatten_Expr(bb, st->Ist.AbiHint.base); 460 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); 461 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); 462 break; 463 case Ist_Exit: 464 e1 = flatten_Expr(bb, st->Ist.Exit.guard); 465 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, 466 st->Ist.Exit.dst)); 467 break; 468 default: 469 vex_printf("\n"); 470 ppIRStmt(st); 471 vex_printf("\n"); 472 vpanic("flatten_Stmt"); 473 } 474 } 475 476 477 static IRSB* flatten_BB ( IRSB* in ) 478 { 479 Int i; 480 IRSB* out; 481 out = emptyIRSB(); 482 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 483 for (i = 0; i < in->stmts_used; i++) 484 if (in->stmts[i]) 485 flatten_Stmt( out, in->stmts[i] ); 486 out->next = flatten_Expr( out, in->next ); 487 out->jumpkind = in->jumpkind; 488 return out; 489 } 490 491 492 /*---------------------------------------------------------------*/ 493 /*--- In-place removal of redundant GETs ---*/ 494 /*---------------------------------------------------------------*/ 495 496 /* Scan forwards, building up an environment binding (min offset, max 497 offset) pairs to values, which will either be temps or constants. 498 499 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the 500 env and if it matches, replace the Get with the stored value. If 501 there is no match, add a (minoff,maxoff) :-> t binding. 502 503 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env 504 any binding which fully or partially overlaps with (minoff,maxoff). 505 Then add a new (minoff,maxoff) :-> t or c binding. */ 506 507 /* Extract the min/max offsets from a guest state array descriptor. */ 508 509 inline 510 static void getArrayBounds ( IRRegArray* descr, 511 UInt* minoff, UInt* maxoff ) 512 { 513 *minoff = descr->base; 514 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1; 515 vassert((*minoff & ~0xFFFF) == 0); 516 vassert((*maxoff & ~0xFFFF) == 0); 517 vassert(*minoff <= *maxoff); 518 } 519 520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */ 521 522 static UInt mk_key_GetPut ( Int offset, IRType ty ) 523 { 524 /* offset should fit in 16 bits. */ 525 UInt minoff = offset; 526 UInt maxoff = minoff + sizeofIRType(ty) - 1; 527 vassert((minoff & ~0xFFFF) == 0); 528 vassert((maxoff & ~0xFFFF) == 0); 529 return (minoff << 16) | maxoff; 530 } 531 532 static UInt mk_key_GetIPutI ( IRRegArray* descr ) 533 { 534 UInt minoff, maxoff; 535 getArrayBounds( descr, &minoff, &maxoff ); 536 vassert((minoff & ~0xFFFF) == 0); 537 vassert((maxoff & ~0xFFFF) == 0); 538 return (minoff << 16) | maxoff; 539 } 540 541 /* Supposing h has keys of the form generated by mk_key_GetPut and 542 mk_key_GetIPutI, invalidate any key which overlaps (k_lo 543 .. k_hi). 544 */ 545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) 546 { 547 Int j; 548 UInt e_lo, e_hi; 549 vassert(k_lo <= k_hi); 550 /* invalidate any env entries which in any way overlap (k_lo 551 .. k_hi) */ 552 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */ 553 554 for (j = 0; j < h->used; j++) { 555 if (!h->inuse[j]) 556 continue; 557 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF; 558 e_hi = ((UInt)h->key[j]) & 0xFFFF; 559 vassert(e_lo <= e_hi); 560 if (e_hi < k_lo || k_hi < e_lo) 561 continue; /* no overlap possible */ 562 else 563 /* overlap; invalidate */ 564 h->inuse[j] = False; 565 } 566 } 567 568 569 static void redundant_get_removal_BB ( IRSB* bb ) 570 { 571 HashHW* env = newHHW(); 572 UInt key = 0; /* keep gcc -O happy */ 573 Int i, j; 574 HWord val; 575 576 for (i = 0; i < bb->stmts_used; i++) { 577 IRStmt* st = bb->stmts[i]; 578 579 if (st->tag == Ist_NoOp) 580 continue; 581 582 /* Deal with Gets */ 583 if (st->tag == Ist_WrTmp 584 && st->Ist.WrTmp.data->tag == Iex_Get) { 585 /* st is 't = Get(...)'. Look up in the environment and see 586 if the Get can be replaced. */ 587 IRExpr* get = st->Ist.WrTmp.data; 588 key = (HWord)mk_key_GetPut( get->Iex.Get.offset, 589 get->Iex.Get.ty ); 590 if (lookupHHW(env, &val, (HWord)key)) { 591 /* found it */ 592 /* Note, we could do better here. If the types are 593 different we don't do the substitution, since doing so 594 could lead to invalidly-typed IR. An improvement would 595 be to stick in a reinterpret-style cast, although that 596 would make maintaining flatness more difficult. */ 597 IRExpr* valE = (IRExpr*)val; 598 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) 599 == st->Ist.WrTmp.data->Iex.Get.ty ); 600 if (typesOK && DEBUG_IROPT) { 601 vex_printf("rGET: "); ppIRExpr(get); 602 vex_printf(" -> "); ppIRExpr(valE); 603 vex_printf("\n"); 604 } 605 if (typesOK) 606 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); 607 } else { 608 /* Not found, but at least we know that t and the Get(...) 609 are now associated. So add a binding to reflect that 610 fact. */ 611 addToHHW( env, (HWord)key, 612 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) ); 613 } 614 } 615 616 /* Deal with Puts: invalidate any env entries overlapped by this 617 Put */ 618 if (st->tag == Ist_Put || st->tag == Ist_PutI) { 619 UInt k_lo, k_hi; 620 if (st->tag == Ist_Put) { 621 key = mk_key_GetPut( st->Ist.Put.offset, 622 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 623 } else { 624 vassert(st->tag == Ist_PutI); 625 key = mk_key_GetIPutI( st->Ist.PutI.descr ); 626 } 627 628 k_lo = (key >> 16) & 0xFFFF; 629 k_hi = key & 0xFFFF; 630 invalidateOverlaps(env, k_lo, k_hi); 631 } 632 else 633 if (st->tag == Ist_Dirty) { 634 /* Deal with dirty helpers which write or modify guest state. 635 Invalidate the entire env. We could do a lot better 636 here. */ 637 IRDirty* d = st->Ist.Dirty.details; 638 Bool writes = False; 639 for (j = 0; j < d->nFxState; j++) { 640 if (d->fxState[j].fx == Ifx_Modify 641 || d->fxState[j].fx == Ifx_Write) 642 writes = True; 643 } 644 if (writes) { 645 /* dump the entire env (not clever, but correct ...) */ 646 for (j = 0; j < env->used; j++) 647 env->inuse[j] = False; 648 if (0) vex_printf("rGET: trash env due to dirty helper\n"); 649 } 650 } 651 652 /* add this one to the env, if appropriate */ 653 if (st->tag == Ist_Put) { 654 vassert(isIRAtom(st->Ist.Put.data)); 655 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); 656 } 657 658 } /* for (i = 0; i < bb->stmts_used; i++) */ 659 660 } 661 662 663 /*---------------------------------------------------------------*/ 664 /*--- In-place removal of redundant PUTs ---*/ 665 /*---------------------------------------------------------------*/ 666 667 /* Find any Get uses in st and invalidate any partially or fully 668 overlapping ranges listed in env. Due to the flattening phase, the 669 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ 670 671 static void handle_gets_Stmt ( 672 HashHW* env, 673 IRStmt* st, 674 Bool (*preciseMemExnsFn)(Int,Int) 675 ) 676 { 677 Int j; 678 UInt key = 0; /* keep gcc -O happy */ 679 Bool isGet; 680 Bool memRW = False; 681 IRExpr* e; 682 683 switch (st->tag) { 684 685 /* This is the only interesting case. Deal with Gets in the RHS 686 expression. */ 687 case Ist_WrTmp: 688 e = st->Ist.WrTmp.data; 689 switch (e->tag) { 690 case Iex_Get: 691 isGet = True; 692 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty ); 693 break; 694 case Iex_GetI: 695 isGet = True; 696 key = mk_key_GetIPutI ( e->Iex.GetI.descr ); 697 break; 698 case Iex_Load: 699 isGet = False; 700 memRW = True; 701 break; 702 default: 703 isGet = False; 704 } 705 if (isGet) { 706 UInt k_lo, k_hi; 707 k_lo = (key >> 16) & 0xFFFF; 708 k_hi = key & 0xFFFF; 709 invalidateOverlaps(env, k_lo, k_hi); 710 } 711 break; 712 713 /* Be very conservative for dirty helper calls; dump the entire 714 environment. The helper might read guest state, in which 715 case it needs to be flushed first. Also, the helper might 716 access guest memory, in which case all parts of the guest 717 state requiring precise exceptions needs to be flushed. The 718 crude solution is just to flush everything; we could easily 719 enough do a lot better if needed. */ 720 /* Probably also overly-conservative, but also dump everything 721 if we hit a memory bus event (fence, lock, unlock). Ditto 722 AbiHints, CASs, LLs and SCs. */ 723 case Ist_AbiHint: 724 vassert(isIRAtom(st->Ist.AbiHint.base)); 725 vassert(isIRAtom(st->Ist.AbiHint.nia)); 726 /* fall through */ 727 case Ist_MBE: 728 case Ist_Dirty: 729 case Ist_CAS: 730 case Ist_LLSC: 731 for (j = 0; j < env->used; j++) 732 env->inuse[j] = False; 733 break; 734 735 /* all other cases are boring. */ 736 case Ist_Store: 737 vassert(isIRAtom(st->Ist.Store.addr)); 738 vassert(isIRAtom(st->Ist.Store.data)); 739 memRW = True; 740 break; 741 742 case Ist_Exit: 743 vassert(isIRAtom(st->Ist.Exit.guard)); 744 break; 745 746 case Ist_PutI: 747 vassert(isIRAtom(st->Ist.PutI.ix)); 748 vassert(isIRAtom(st->Ist.PutI.data)); 749 break; 750 751 case Ist_NoOp: 752 case Ist_IMark: 753 break; 754 755 default: 756 vex_printf("\n"); 757 ppIRStmt(st); 758 vex_printf("\n"); 759 vpanic("handle_gets_Stmt"); 760 } 761 762 if (memRW) { 763 /* This statement accesses memory. So we need to dump all parts 764 of the environment corresponding to guest state that may not 765 be reordered with respect to memory references. That means 766 at least the stack pointer. */ 767 for (j = 0; j < env->used; j++) { 768 if (!env->inuse[j]) 769 continue; 770 if (vex_control.iropt_precise_memory_exns) { 771 /* Precise exceptions required. Flush all guest state. */ 772 env->inuse[j] = False; 773 } else { 774 /* Just flush the minimal amount required, as computed by 775 preciseMemExnsFn. */ 776 HWord k_lo = (env->key[j] >> 16) & 0xFFFF; 777 HWord k_hi = env->key[j] & 0xFFFF; 778 if (preciseMemExnsFn( k_lo, k_hi )) 779 env->inuse[j] = False; 780 } 781 } 782 } /* if (memRW) */ 783 784 } 785 786 787 /* Scan backwards, building up a set of (min offset, max 788 offset) pairs, indicating those parts of the guest state 789 for which the next event is a write. 790 791 On seeing a conditional exit, empty the set. 792 793 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is 794 completely within the set, remove the Put. Otherwise, add 795 (minoff,maxoff) to the set. 796 797 On seeing 'Get (minoff,maxoff)', remove any part of the set 798 overlapping (minoff,maxoff). The same has to happen for any events 799 which implicitly read parts of the guest state: dirty helper calls 800 and loads/stores. 801 */ 802 803 static void redundant_put_removal_BB ( 804 IRSB* bb, 805 Bool (*preciseMemExnsFn)(Int,Int) 806 ) 807 { 808 Int i, j; 809 Bool isPut; 810 IRStmt* st; 811 UInt key = 0; /* keep gcc -O happy */ 812 813 HashHW* env = newHHW(); 814 for (i = bb->stmts_used-1; i >= 0; i--) { 815 st = bb->stmts[i]; 816 817 if (st->tag == Ist_NoOp) 818 continue; 819 820 /* Deal with conditional exits. */ 821 if (st->tag == Ist_Exit) { 822 /* Since control may not get beyond this point, we must empty 823 out the set, since we can no longer claim that the next 824 event for any part of the guest state is definitely a 825 write. */ 826 vassert(isIRAtom(st->Ist.Exit.guard)); 827 for (j = 0; j < env->used; j++) 828 env->inuse[j] = False; 829 continue; 830 } 831 832 /* Deal with Puts */ 833 switch (st->tag) { 834 case Ist_Put: 835 isPut = True; 836 key = mk_key_GetPut( st->Ist.Put.offset, 837 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 838 vassert(isIRAtom(st->Ist.Put.data)); 839 break; 840 case Ist_PutI: 841 isPut = True; 842 key = mk_key_GetIPutI( st->Ist.PutI.descr ); 843 vassert(isIRAtom(st->Ist.PutI.ix)); 844 vassert(isIRAtom(st->Ist.PutI.data)); 845 break; 846 default: 847 isPut = False; 848 } 849 if (isPut && st->tag != Ist_PutI) { 850 /* See if any single entry in env overlaps this Put. This is 851 simplistic in that the transformation is valid if, say, two 852 or more entries in the env overlap this Put, but the use of 853 lookupHHW will only find a single entry which exactly 854 overlaps this Put. This is suboptimal but safe. */ 855 if (lookupHHW(env, NULL, (HWord)key)) { 856 /* This Put is redundant because a later one will overwrite 857 it. So NULL (nop) it out. */ 858 if (DEBUG_IROPT) { 859 vex_printf("rPUT: "); ppIRStmt(st); 860 vex_printf("\n"); 861 } 862 bb->stmts[i] = IRStmt_NoOp(); 863 } else { 864 /* We can't demonstrate that this Put is redundant, so add it 865 to the running collection. */ 866 addToHHW(env, (HWord)key, 0); 867 } 868 continue; 869 } 870 871 /* Deal with Gets. These remove bits of the environment since 872 appearance of a Get means that the next event for that slice 873 of the guest state is no longer a write, but a read. Also 874 deals with implicit reads of guest state needed to maintain 875 precise exceptions. */ 876 handle_gets_Stmt( env, st, preciseMemExnsFn ); 877 } 878 } 879 880 881 /*---------------------------------------------------------------*/ 882 /*--- Constant propagation and folding ---*/ 883 /*---------------------------------------------------------------*/ 884 885 /* The env in this section is a map from IRTemp to IRExpr*, 886 that is, an array indexed by IRTemp. */ 887 888 /* Are both expressions simply the same IRTemp ? */ 889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 ) 890 { 891 return toBool( e1->tag == Iex_RdTmp 892 && e2->tag == Iex_RdTmp 893 && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp ); 894 } 895 896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 ) 897 { 898 return toBool( e1->tag == Iex_Const 899 && e2->tag == Iex_Const 900 && e1->Iex.Const.con->tag == Ico_U32 901 && e2->Iex.Const.con->tag == Ico_U32 902 && e1->Iex.Const.con->Ico.U32 903 == e2->Iex.Const.con->Ico.U32 ); 904 } 905 906 /* Are both expressions either the same IRTemp or IRConst-U32s ? If 907 in doubt, say No. */ 908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 ) 909 { 910 switch (e1->tag) { 911 case Iex_RdTmp: 912 return sameIRTemps(e1, e2); 913 case Iex_Const: 914 return sameIcoU32s(e1, e2); 915 default: 916 return False; 917 } 918 } 919 920 static Bool notBool ( Bool b ) 921 { 922 if (b == True) return False; 923 if (b == False) return True; 924 vpanic("notBool"); 925 } 926 927 /* Make a zero which has the same type as the result of the given 928 primop. */ 929 static IRExpr* mkZeroOfPrimopResultType ( IROp op ) 930 { 931 switch (op) { 932 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0)); 933 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); 934 case Iop_Sub32: 935 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); 936 case Iop_Sub64: 937 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); 938 case Iop_XorV128: return IRExpr_Const(IRConst_V128(0)); 939 default: vpanic("mkZeroOfPrimopResultType: bad primop"); 940 } 941 } 942 943 /* Make a value containing all 1-bits, which has the same type as the 944 result of the given primop. */ 945 static IRExpr* mkOnesOfPrimopResultType ( IROp op ) 946 { 947 switch (op) { 948 case Iop_CmpEQ64: 949 return IRExpr_Const(IRConst_U1(toBool(1))); 950 case Iop_CmpEQ8x8: 951 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL)); 952 case Iop_CmpEQ8x16: 953 return IRExpr_Const(IRConst_V128(0xFFFF)); 954 default: 955 vpanic("mkOnesOfPrimopResultType: bad primop"); 956 } 957 } 958 959 960 static IRExpr* fold_Expr ( IRExpr* e ) 961 { 962 Int shift; 963 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ 964 965 /* UNARY ops */ 966 if (e->tag == Iex_Unop 967 && e->Iex.Unop.arg->tag == Iex_Const) { 968 switch (e->Iex.Unop.op) { 969 case Iop_1Uto8: 970 e2 = IRExpr_Const(IRConst_U8(toUChar( 971 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 972 ? 1 : 0))); 973 break; 974 case Iop_1Uto32: 975 e2 = IRExpr_Const(IRConst_U32( 976 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 977 ? 1 : 0)); 978 break; 979 case Iop_1Uto64: 980 e2 = IRExpr_Const(IRConst_U64( 981 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 982 ? 1 : 0)); 983 break; 984 985 case Iop_1Sto8: 986 e2 = IRExpr_Const(IRConst_U8(toUChar( 987 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 988 ? 0xFF : 0))); 989 break; 990 case Iop_1Sto16: 991 e2 = IRExpr_Const(IRConst_U16(toUShort( 992 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 993 ? 0xFFFF : 0))); 994 break; 995 case Iop_1Sto32: 996 e2 = IRExpr_Const(IRConst_U32( 997 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 998 ? 0xFFFFFFFF : 0)); 999 break; 1000 case Iop_1Sto64: 1001 e2 = IRExpr_Const(IRConst_U64( 1002 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1003 ? 0xFFFFFFFFFFFFFFFFULL : 0)); 1004 break; 1005 1006 case Iop_8Sto32: { 1007 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1008 s32 <<= 24; 1009 s32 >>= 24; 1010 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1011 break; 1012 } 1013 case Iop_8Uto64: 1014 e2 = IRExpr_Const(IRConst_U64( 1015 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1016 break; 1017 case Iop_16Uto64: 1018 e2 = IRExpr_Const(IRConst_U64( 1019 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1020 break; 1021 case Iop_8Uto32: 1022 e2 = IRExpr_Const(IRConst_U32( 1023 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1024 break; 1025 case Iop_16Uto32: 1026 e2 = IRExpr_Const(IRConst_U32( 1027 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1028 break; 1029 case Iop_32to16: 1030 e2 = IRExpr_Const(IRConst_U16(toUShort( 1031 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1032 break; 1033 case Iop_32to8: 1034 e2 = IRExpr_Const(IRConst_U8(toUChar( 1035 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1036 break; 1037 case Iop_32to1: 1038 e2 = IRExpr_Const(IRConst_U1(toBool( 1039 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1040 ))); 1041 break; 1042 case Iop_64to1: 1043 e2 = IRExpr_Const(IRConst_U1(toBool( 1044 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64) 1045 ))); 1046 break; 1047 1048 case Iop_Not64: 1049 e2 = IRExpr_Const(IRConst_U64( 1050 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); 1051 break; 1052 case Iop_Not32: 1053 e2 = IRExpr_Const(IRConst_U32( 1054 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1055 break; 1056 case Iop_Not16: 1057 e2 = IRExpr_Const(IRConst_U16(toUShort( 1058 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16)))); 1059 break; 1060 case Iop_Not8: 1061 e2 = IRExpr_Const(IRConst_U8(toUChar( 1062 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); 1063 break; 1064 1065 case Iop_Not1: 1066 e2 = IRExpr_Const(IRConst_U1( 1067 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); 1068 break; 1069 1070 case Iop_64to8: { 1071 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1072 w64 &= 0xFFULL; 1073 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 )); 1074 break; 1075 } 1076 case Iop_64to16: { 1077 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1078 w64 &= 0xFFFFULL; 1079 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 )); 1080 break; 1081 } 1082 case Iop_64to32: { 1083 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1084 w64 &= 0x00000000FFFFFFFFULL; 1085 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1086 break; 1087 } 1088 case Iop_64HIto32: { 1089 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1090 w64 >>= 32; 1091 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1092 break; 1093 } 1094 case Iop_32Uto64: 1095 e2 = IRExpr_Const(IRConst_U64( 1096 0xFFFFFFFFULL 1097 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)); 1098 break; 1099 case Iop_32Sto64: { 1100 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1101 s64 <<= 32; 1102 s64 >>= 32; 1103 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1104 break; 1105 } 1106 case Iop_CmpNEZ8: 1107 e2 = IRExpr_Const(IRConst_U1(toBool( 1108 0 != 1109 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) 1110 ))); 1111 break; 1112 case Iop_CmpNEZ32: 1113 e2 = IRExpr_Const(IRConst_U1(toBool( 1114 0 != 1115 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1116 ))); 1117 break; 1118 case Iop_CmpNEZ64: 1119 e2 = IRExpr_Const(IRConst_U1(toBool( 1120 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64 1121 ))); 1122 break; 1123 1124 case Iop_CmpwNEZ32: { 1125 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1126 if (w32 == 0) 1127 e2 = IRExpr_Const(IRConst_U32( 0 )); 1128 else 1129 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); 1130 break; 1131 } 1132 case Iop_CmpwNEZ64: { 1133 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1134 if (w64 == 0) 1135 e2 = IRExpr_Const(IRConst_U64( 0 )); 1136 else 1137 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); 1138 break; 1139 } 1140 1141 case Iop_Left32: { 1142 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1143 Int s32 = (Int)(u32 & 0xFFFFFFFF); 1144 s32 = (s32 | (-s32)); 1145 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); 1146 break; 1147 } 1148 1149 case Iop_Left64: { 1150 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1151 Long s64 = (Long)u64; 1152 s64 = (s64 | (-s64)); 1153 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); 1154 break; 1155 } 1156 1157 default: 1158 goto unhandled; 1159 } 1160 } 1161 1162 /* BINARY ops */ 1163 if (e->tag == Iex_Binop) { 1164 if (e->Iex.Binop.arg1->tag == Iex_Const 1165 && e->Iex.Binop.arg2->tag == Iex_Const) { 1166 /* cases where both args are consts */ 1167 switch (e->Iex.Binop.op) { 1168 1169 /* -- Or -- */ 1170 case Iop_Or8: 1171 e2 = IRExpr_Const(IRConst_U8(toUChar( 1172 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1173 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1174 break; 1175 case Iop_Or16: 1176 e2 = IRExpr_Const(IRConst_U16(toUShort( 1177 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1178 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1179 break; 1180 case Iop_Or32: 1181 e2 = IRExpr_Const(IRConst_U32( 1182 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1183 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1184 break; 1185 case Iop_Or64: 1186 e2 = IRExpr_Const(IRConst_U64( 1187 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1188 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1189 break; 1190 1191 /* -- Xor -- */ 1192 case Iop_Xor8: 1193 e2 = IRExpr_Const(IRConst_U8(toUChar( 1194 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1195 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1196 break; 1197 case Iop_Xor16: 1198 e2 = IRExpr_Const(IRConst_U16(toUShort( 1199 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1200 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1201 break; 1202 case Iop_Xor32: 1203 e2 = IRExpr_Const(IRConst_U32( 1204 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1205 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1206 break; 1207 case Iop_Xor64: 1208 e2 = IRExpr_Const(IRConst_U64( 1209 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1210 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1211 break; 1212 1213 /* -- And -- */ 1214 case Iop_And8: 1215 e2 = IRExpr_Const(IRConst_U8(toUChar( 1216 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1217 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1218 break; 1219 case Iop_And32: 1220 e2 = IRExpr_Const(IRConst_U32( 1221 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1222 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1223 break; 1224 case Iop_And64: 1225 e2 = IRExpr_Const(IRConst_U64( 1226 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1227 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1228 break; 1229 1230 /* -- Add -- */ 1231 case Iop_Add8: 1232 e2 = IRExpr_Const(IRConst_U8(toUChar( 1233 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1234 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1235 break; 1236 case Iop_Add32: 1237 e2 = IRExpr_Const(IRConst_U32( 1238 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1239 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1240 break; 1241 case Iop_Add64: 1242 e2 = IRExpr_Const(IRConst_U64( 1243 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1244 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1245 break; 1246 1247 /* -- Sub -- */ 1248 case Iop_Sub8: 1249 e2 = IRExpr_Const(IRConst_U8(toUChar( 1250 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1251 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1252 break; 1253 case Iop_Sub32: 1254 e2 = IRExpr_Const(IRConst_U32( 1255 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1256 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1257 break; 1258 case Iop_Sub64: 1259 e2 = IRExpr_Const(IRConst_U64( 1260 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1261 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1262 break; 1263 1264 /* -- Max32U -- */ 1265 case Iop_Max32U: { 1266 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1267 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1268 UInt res = u32a > u32b ? u32a : u32b; 1269 e2 = IRExpr_Const(IRConst_U32(res)); 1270 break; 1271 } 1272 1273 /* -- Mul -- */ 1274 case Iop_Mul32: 1275 e2 = IRExpr_Const(IRConst_U32( 1276 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1277 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1278 break; 1279 case Iop_Mul64: 1280 e2 = IRExpr_Const(IRConst_U64( 1281 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1282 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1283 break; 1284 1285 case Iop_MullS32: { 1286 /* very paranoid */ 1287 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1288 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1289 Int s32a = (Int)u32a; 1290 Int s32b = (Int)u32b; 1291 Long s64a = (Long)s32a; 1292 Long s64b = (Long)s32b; 1293 Long sres = s64a * s64b; 1294 ULong ures = (ULong)sres; 1295 e2 = IRExpr_Const(IRConst_U64(ures)); 1296 break; 1297 } 1298 1299 /* -- Shl -- */ 1300 case Iop_Shl32: 1301 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1302 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1303 if (shift >= 0 && shift <= 31) 1304 e2 = IRExpr_Const(IRConst_U32( 1305 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1306 << shift))); 1307 break; 1308 case Iop_Shl64: 1309 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1310 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1311 if (shift >= 0 && shift <= 63) 1312 e2 = IRExpr_Const(IRConst_U64( 1313 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1314 << shift))); 1315 break; 1316 1317 /* -- Sar -- */ 1318 case Iop_Sar32: { 1319 /* paranoid ... */ 1320 /*signed*/ Int s32; 1321 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1322 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1323 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1324 if (shift >= 0 && shift <= 31) { 1325 s32 >>=/*signed*/ shift; 1326 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1327 } 1328 break; 1329 } 1330 case Iop_Sar64: { 1331 /* paranoid ... */ 1332 /*signed*/ Long s64; 1333 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1334 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1335 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1336 if (shift >= 0 && shift <= 63) { 1337 s64 >>=/*signed*/ shift; 1338 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1339 } 1340 break; 1341 } 1342 1343 /* -- Shr -- */ 1344 case Iop_Shr32: { 1345 /* paranoid ... */ 1346 /*unsigned*/ UInt u32; 1347 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1348 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1349 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1350 if (shift >= 0 && shift <= 31) { 1351 u32 >>=/*unsigned*/ shift; 1352 e2 = IRExpr_Const(IRConst_U32(u32)); 1353 } 1354 break; 1355 } 1356 case Iop_Shr64: { 1357 /* paranoid ... */ 1358 /*unsigned*/ ULong u64; 1359 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1360 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1361 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1362 if (shift >= 0 && shift <= 63) { 1363 u64 >>=/*unsigned*/ shift; 1364 e2 = IRExpr_Const(IRConst_U64(u64)); 1365 } 1366 break; 1367 } 1368 1369 /* -- CmpEQ -- */ 1370 case Iop_CmpEQ32: 1371 e2 = IRExpr_Const(IRConst_U1(toBool( 1372 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1373 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1374 break; 1375 case Iop_CmpEQ64: 1376 e2 = IRExpr_Const(IRConst_U1(toBool( 1377 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1378 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1379 break; 1380 1381 /* -- CmpNE -- */ 1382 case Iop_CmpNE8: 1383 e2 = IRExpr_Const(IRConst_U1(toBool( 1384 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) 1385 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); 1386 break; 1387 case Iop_CmpNE32: 1388 e2 = IRExpr_Const(IRConst_U1(toBool( 1389 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1390 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1391 break; 1392 case Iop_CmpNE64: 1393 e2 = IRExpr_Const(IRConst_U1(toBool( 1394 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1395 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1396 break; 1397 1398 /* -- CmpLEU -- */ 1399 case Iop_CmpLE32U: 1400 e2 = IRExpr_Const(IRConst_U1(toBool( 1401 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1402 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1403 break; 1404 1405 /* -- CmpLES -- */ 1406 case Iop_CmpLE32S: 1407 e2 = IRExpr_Const(IRConst_U1(toBool( 1408 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1409 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1410 break; 1411 1412 /* -- CmpLTS -- */ 1413 case Iop_CmpLT32S: 1414 e2 = IRExpr_Const(IRConst_U1(toBool( 1415 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1416 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1417 break; 1418 1419 /* -- CmpLTU -- */ 1420 case Iop_CmpLT32U: 1421 e2 = IRExpr_Const(IRConst_U1(toBool( 1422 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1423 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1424 break; 1425 1426 /* -- CmpORD -- */ 1427 case Iop_CmpORD32S: { 1428 /* very paranoid */ 1429 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1430 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1431 Int s32a = (Int)u32a; 1432 Int s32b = (Int)u32b; 1433 Int r = 0x2; /* EQ */ 1434 if (s32a < s32b) { 1435 r = 0x8; /* LT */ 1436 } 1437 else if (s32a > s32b) { 1438 r = 0x4; /* GT */ 1439 } 1440 e2 = IRExpr_Const(IRConst_U32(r)); 1441 break; 1442 } 1443 1444 /* -- nHLto2n -- */ 1445 case Iop_32HLto64: 1446 e2 = IRExpr_Const(IRConst_U64( 1447 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32) 1448 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)) 1449 )); 1450 break; 1451 case Iop_64HLto128: 1452 /* We can't fold this, because there is no way to 1453 express he result in IR, but at least pretend to 1454 handle it, so as to stop getting blasted with 1455 no-rule-for-this-primop messages. */ 1456 break; 1457 1458 default: 1459 goto unhandled; 1460 } 1461 1462 } else { 1463 1464 /* other cases (identities, etc) */ 1465 1466 /* Shl64/Shr64(x,0) ==> x */ 1467 if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64) 1468 && e->Iex.Binop.arg2->tag == Iex_Const 1469 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) { 1470 e2 = e->Iex.Binop.arg1; 1471 } else 1472 1473 /* Shl32/Shr32(x,0) ==> x */ 1474 if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32) 1475 && e->Iex.Binop.arg2->tag == Iex_Const 1476 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) { 1477 e2 = e->Iex.Binop.arg1; 1478 } else 1479 1480 /* Or8(x,0) ==> x */ 1481 if ((e->Iex.Binop.op == Iop_Or8) 1482 && e->Iex.Binop.arg2->tag == Iex_Const 1483 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) { 1484 e2 = e->Iex.Binop.arg1; 1485 } else 1486 1487 /* Or16(x,0) ==> x */ 1488 if ((e->Iex.Binop.op == Iop_Or16) 1489 && e->Iex.Binop.arg2->tag == Iex_Const 1490 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) { 1491 e2 = e->Iex.Binop.arg1; 1492 } else 1493 1494 /* Or32/Add32/Max32U(x,0) ==> x */ 1495 if ((e->Iex.Binop.op == Iop_Add32 1496 || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U) 1497 && e->Iex.Binop.arg2->tag == Iex_Const 1498 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) { 1499 e2 = e->Iex.Binop.arg1; 1500 } else 1501 1502 /* Add32(t,t) ==> t << 1. Memcheck doesn't understand that 1503 x+x produces a defined least significant bit, and it seems 1504 simplest just to get rid of the problem by rewriting it 1505 out, since the opportunity to do so exists. */ 1506 if (e->Iex.Binop.op == Iop_Add32 1507 && e->Iex.Binop.arg1->tag == Iex_RdTmp 1508 && e->Iex.Binop.arg2->tag == Iex_RdTmp 1509 && e->Iex.Binop.arg1->Iex.RdTmp.tmp 1510 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) { 1511 e2 = IRExpr_Binop(Iop_Shl32, 1512 e->Iex.Binop.arg1, 1513 IRExpr_Const(IRConst_U8(1))); 1514 } else 1515 1516 /* Add64(t,t) ==> t << 1; rationale as for Add32(t,t) above. */ 1517 if (e->Iex.Binop.op == Iop_Add64 1518 && e->Iex.Binop.arg1->tag == Iex_RdTmp 1519 && e->Iex.Binop.arg2->tag == Iex_RdTmp 1520 && e->Iex.Binop.arg1->Iex.RdTmp.tmp 1521 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) { 1522 e2 = IRExpr_Binop(Iop_Shl64, 1523 e->Iex.Binop.arg1, 1524 IRExpr_Const(IRConst_U8(1))); 1525 } else 1526 1527 /* Add8(t,t) ==> t << 1; rationale as for Add32(t,t) above. */ 1528 if (e->Iex.Binop.op == Iop_Add8 1529 && e->Iex.Binop.arg1->tag == Iex_RdTmp 1530 && e->Iex.Binop.arg2->tag == Iex_RdTmp 1531 && e->Iex.Binop.arg1->Iex.RdTmp.tmp 1532 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) { 1533 e2 = IRExpr_Binop(Iop_Shl8, 1534 e->Iex.Binop.arg1, 1535 IRExpr_Const(IRConst_U8(1))); 1536 } else 1537 /* NB no Add16(t,t) case yet as no known test case exists */ 1538 1539 /* Or64/Add64(x,0) ==> x */ 1540 if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64) 1541 && e->Iex.Binop.arg2->tag == Iex_Const 1542 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) { 1543 e2 = e->Iex.Binop.arg1; 1544 } else 1545 1546 /* And32(x,0xFFFFFFFF) ==> x */ 1547 if (e->Iex.Binop.op == Iop_And32 1548 && e->Iex.Binop.arg2->tag == Iex_Const 1549 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) { 1550 e2 = e->Iex.Binop.arg1; 1551 } else 1552 1553 /* And32(x,0) ==> 0 */ 1554 if (e->Iex.Binop.op == Iop_And32 1555 && e->Iex.Binop.arg2->tag == Iex_Const 1556 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) { 1557 e2 = IRExpr_Const(IRConst_U32(0)); 1558 } else 1559 1560 /* And32/Shl32(0,x) ==> 0 */ 1561 if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32) 1562 && e->Iex.Binop.arg1->tag == Iex_Const 1563 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) { 1564 e2 = IRExpr_Const(IRConst_U32(0)); 1565 } else 1566 1567 /* Or8(0,x) ==> x */ 1568 if (e->Iex.Binop.op == Iop_Or8 1569 && e->Iex.Binop.arg1->tag == Iex_Const 1570 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) { 1571 e2 = e->Iex.Binop.arg2; 1572 } else 1573 1574 /* Or32/Max32U(0,x) ==> x */ 1575 if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U) 1576 && e->Iex.Binop.arg1->tag == Iex_Const 1577 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) { 1578 e2 = e->Iex.Binop.arg2; 1579 } else 1580 1581 /* Or64(0,x) ==> x */ 1582 if (e->Iex.Binop.op == Iop_Or64 1583 && e->Iex.Binop.arg1->tag == Iex_Const 1584 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) { 1585 e2 = e->Iex.Binop.arg2; 1586 } else 1587 1588 /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */ 1589 /* And8/16/32/64(t,t) ==> t, for some IRTemp t */ 1590 /* Max32U(t,t) ==> t, for some IRTemp t */ 1591 switch (e->Iex.Binop.op) { 1592 case Iop_And64: case Iop_And32: 1593 case Iop_And16: case Iop_And8: 1594 case Iop_Or64: case Iop_Or32: 1595 case Iop_Or16: case Iop_Or8: case Iop_OrV128: 1596 case Iop_Max32U: 1597 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) 1598 e2 = e->Iex.Binop.arg1; 1599 break; 1600 default: 1601 break; 1602 } 1603 1604 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ 1605 /* Sub32/64(t,t) ==> 0, for some IRTemp t */ 1606 switch (e->Iex.Binop.op) { 1607 case Iop_Xor64: case Iop_Xor32: 1608 case Iop_Xor16: case Iop_Xor8: 1609 case Iop_XorV128: 1610 case Iop_Sub64: case Iop_Sub32: 1611 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) 1612 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 1613 break; 1614 default: 1615 break; 1616 } 1617 1618 switch (e->Iex.Binop.op) { 1619 case Iop_CmpEQ64: 1620 case Iop_CmpEQ8x8: 1621 case Iop_CmpEQ8x16: 1622 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) 1623 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 1624 break; 1625 default: 1626 break; 1627 } 1628 1629 } 1630 } 1631 1632 /* Mux0X */ 1633 if (e->tag == Iex_Mux0X) { 1634 /* is the discriminant is a constant? */ 1635 if (e->Iex.Mux0X.cond->tag == Iex_Const) { 1636 Bool zero; 1637 /* assured us by the IR type rules */ 1638 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8); 1639 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond 1640 ->Iex.Const.con->Ico.U8)); 1641 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX; 1642 } 1643 else 1644 /* are the arms identical? (pretty weedy test) */ 1645 if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0, 1646 e->Iex.Mux0X.exprX)) { 1647 e2 = e->Iex.Mux0X.expr0; 1648 } 1649 } 1650 1651 /* Show cases where we've found but not folded 'op(t,t)'. */ 1652 if (0 && e == e2 && e->tag == Iex_Binop 1653 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 1654 vex_printf("IDENT: "); 1655 ppIRExpr(e); vex_printf("\n"); 1656 } 1657 1658 /* Show the overall results of folding. */ 1659 if (DEBUG_IROPT && e2 != e) { 1660 vex_printf("FOLD: "); 1661 ppIRExpr(e); vex_printf(" -> "); 1662 ppIRExpr(e2); vex_printf("\n"); 1663 } 1664 1665 return e2; 1666 1667 unhandled: 1668 # if 0 1669 vex_printf("\n\n"); 1670 ppIRExpr(e); 1671 vpanic("fold_Expr: no rule for the above"); 1672 # else 1673 if (vex_control.iropt_verbosity > 0) { 1674 vex_printf("vex iropt: fold_Expr: no rule for: "); 1675 ppIRExpr(e); 1676 vex_printf("\n"); 1677 } 1678 return e2; 1679 # endif 1680 } 1681 1682 1683 /* Apply the subst to a simple 1-level expression -- guaranteed to be 1684 1-level due to previous flattening pass. */ 1685 1686 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) 1687 { 1688 switch (ex->tag) { 1689 case Iex_RdTmp: 1690 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { 1691 return env[(Int)ex->Iex.RdTmp.tmp]; 1692 } else { 1693 /* not bound in env */ 1694 return ex; 1695 } 1696 1697 case Iex_Const: 1698 case Iex_Get: 1699 return ex; 1700 1701 case Iex_GetI: 1702 vassert(isIRAtom(ex->Iex.GetI.ix)); 1703 return IRExpr_GetI( 1704 ex->Iex.GetI.descr, 1705 subst_Expr(env, ex->Iex.GetI.ix), 1706 ex->Iex.GetI.bias 1707 ); 1708 1709 case Iex_Qop: 1710 vassert(isIRAtom(ex->Iex.Qop.arg1)); 1711 vassert(isIRAtom(ex->Iex.Qop.arg2)); 1712 vassert(isIRAtom(ex->Iex.Qop.arg3)); 1713 vassert(isIRAtom(ex->Iex.Qop.arg4)); 1714 return IRExpr_Qop( 1715 ex->Iex.Qop.op, 1716 subst_Expr(env, ex->Iex.Qop.arg1), 1717 subst_Expr(env, ex->Iex.Qop.arg2), 1718 subst_Expr(env, ex->Iex.Qop.arg3), 1719 subst_Expr(env, ex->Iex.Qop.arg4) 1720 ); 1721 1722 case Iex_Triop: 1723 vassert(isIRAtom(ex->Iex.Triop.arg1)); 1724 vassert(isIRAtom(ex->Iex.Triop.arg2)); 1725 vassert(isIRAtom(ex->Iex.Triop.arg3)); 1726 return IRExpr_Triop( 1727 ex->Iex.Triop.op, 1728 subst_Expr(env, ex->Iex.Triop.arg1), 1729 subst_Expr(env, ex->Iex.Triop.arg2), 1730 subst_Expr(env, ex->Iex.Triop.arg3) 1731 ); 1732 1733 case Iex_Binop: 1734 vassert(isIRAtom(ex->Iex.Binop.arg1)); 1735 vassert(isIRAtom(ex->Iex.Binop.arg2)); 1736 return IRExpr_Binop( 1737 ex->Iex.Binop.op, 1738 subst_Expr(env, ex->Iex.Binop.arg1), 1739 subst_Expr(env, ex->Iex.Binop.arg2) 1740 ); 1741 1742 case Iex_Unop: 1743 vassert(isIRAtom(ex->Iex.Unop.arg)); 1744 return IRExpr_Unop( 1745 ex->Iex.Unop.op, 1746 subst_Expr(env, ex->Iex.Unop.arg) 1747 ); 1748 1749 case Iex_Load: 1750 vassert(isIRAtom(ex->Iex.Load.addr)); 1751 return IRExpr_Load( 1752 ex->Iex.Load.end, 1753 ex->Iex.Load.ty, 1754 subst_Expr(env, ex->Iex.Load.addr) 1755 ); 1756 1757 case Iex_CCall: { 1758 Int i; 1759 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args); 1760 for (i = 0; args2[i]; i++) { 1761 vassert(isIRAtom(args2[i])); 1762 args2[i] = subst_Expr(env, args2[i]); 1763 } 1764 return IRExpr_CCall( 1765 ex->Iex.CCall.cee, 1766 ex->Iex.CCall.retty, 1767 args2 1768 ); 1769 } 1770 1771 case Iex_Mux0X: 1772 vassert(isIRAtom(ex->Iex.Mux0X.cond)); 1773 vassert(isIRAtom(ex->Iex.Mux0X.expr0)); 1774 vassert(isIRAtom(ex->Iex.Mux0X.exprX)); 1775 return IRExpr_Mux0X( 1776 subst_Expr(env, ex->Iex.Mux0X.cond), 1777 subst_Expr(env, ex->Iex.Mux0X.expr0), 1778 subst_Expr(env, ex->Iex.Mux0X.exprX) 1779 ); 1780 1781 default: 1782 vex_printf("\n\n"); ppIRExpr(ex); 1783 vpanic("subst_Expr"); 1784 1785 } 1786 } 1787 1788 1789 /* Apply the subst to stmt, then fold the result as much as possible. 1790 Much simplified due to stmt being previously flattened. As a 1791 result of this, the stmt may wind up being turned into a no-op. 1792 */ 1793 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) 1794 { 1795 # if 0 1796 vex_printf("\nsubst and fold stmt\n"); 1797 ppIRStmt(st); 1798 vex_printf("\n"); 1799 # endif 1800 1801 switch (st->tag) { 1802 case Ist_AbiHint: 1803 vassert(isIRAtom(st->Ist.AbiHint.base)); 1804 vassert(isIRAtom(st->Ist.AbiHint.nia)); 1805 return IRStmt_AbiHint( 1806 fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)), 1807 st->Ist.AbiHint.len, 1808 fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia)) 1809 ); 1810 case Ist_Put: 1811 vassert(isIRAtom(st->Ist.Put.data)); 1812 return IRStmt_Put( 1813 st->Ist.Put.offset, 1814 fold_Expr(subst_Expr(env, st->Ist.Put.data)) 1815 ); 1816 1817 case Ist_PutI: 1818 vassert(isIRAtom(st->Ist.PutI.ix)); 1819 vassert(isIRAtom(st->Ist.PutI.data)); 1820 return IRStmt_PutI( 1821 st->Ist.PutI.descr, 1822 fold_Expr(subst_Expr(env, st->Ist.PutI.ix)), 1823 st->Ist.PutI.bias, 1824 fold_Expr(subst_Expr(env, st->Ist.PutI.data)) 1825 ); 1826 1827 case Ist_WrTmp: 1828 /* This is the one place where an expr (st->Ist.WrTmp.data) is 1829 allowed to be more than just a constant or a tmp. */ 1830 return IRStmt_WrTmp( 1831 st->Ist.WrTmp.tmp, 1832 fold_Expr(subst_Expr(env, st->Ist.WrTmp.data)) 1833 ); 1834 1835 case Ist_Store: 1836 vassert(isIRAtom(st->Ist.Store.addr)); 1837 vassert(isIRAtom(st->Ist.Store.data)); 1838 return IRStmt_Store( 1839 st->Ist.Store.end, 1840 fold_Expr(subst_Expr(env, st->Ist.Store.addr)), 1841 fold_Expr(subst_Expr(env, st->Ist.Store.data)) 1842 ); 1843 1844 case Ist_CAS: { 1845 IRCAS *cas, *cas2; 1846 cas = st->Ist.CAS.details; 1847 vassert(isIRAtom(cas->addr)); 1848 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 1849 vassert(isIRAtom(cas->expdLo)); 1850 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 1851 vassert(isIRAtom(cas->dataLo)); 1852 cas2 = mkIRCAS( 1853 cas->oldHi, cas->oldLo, cas->end, 1854 fold_Expr(subst_Expr(env, cas->addr)), 1855 cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL, 1856 fold_Expr(subst_Expr(env, cas->expdLo)), 1857 cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL, 1858 fold_Expr(subst_Expr(env, cas->dataLo)) 1859 ); 1860 return IRStmt_CAS(cas2); 1861 } 1862 1863 case Ist_LLSC: 1864 vassert(isIRAtom(st->Ist.LLSC.addr)); 1865 if (st->Ist.LLSC.storedata) 1866 vassert(isIRAtom(st->Ist.LLSC.storedata)); 1867 return IRStmt_LLSC( 1868 st->Ist.LLSC.end, 1869 st->Ist.LLSC.result, 1870 fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)), 1871 st->Ist.LLSC.storedata 1872 ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata)) 1873 : NULL 1874 ); 1875 1876 case Ist_Dirty: { 1877 Int i; 1878 IRDirty *d, *d2; 1879 d = st->Ist.Dirty.details; 1880 d2 = emptyIRDirty(); 1881 *d2 = *d; 1882 d2->args = shallowCopyIRExprVec(d2->args); 1883 if (d2->mFx != Ifx_None) { 1884 vassert(isIRAtom(d2->mAddr)); 1885 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr)); 1886 } 1887 vassert(isIRAtom(d2->guard)); 1888 d2->guard = fold_Expr(subst_Expr(env, d2->guard)); 1889 for (i = 0; d2->args[i]; i++) { 1890 vassert(isIRAtom(d2->args[i])); 1891 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i])); 1892 } 1893 return IRStmt_Dirty(d2); 1894 } 1895 1896 case Ist_IMark: 1897 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len); 1898 1899 case Ist_NoOp: 1900 return IRStmt_NoOp(); 1901 1902 case Ist_MBE: 1903 return IRStmt_MBE(st->Ist.MBE.event); 1904 1905 case Ist_Exit: { 1906 IRExpr* fcond; 1907 vassert(isIRAtom(st->Ist.Exit.guard)); 1908 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard)); 1909 if (fcond->tag == Iex_Const) { 1910 /* Interesting. The condition on this exit has folded down to 1911 a constant. */ 1912 vassert(fcond->Iex.Const.con->tag == Ico_U1); 1913 vassert(fcond->Iex.Const.con->Ico.U1 == False 1914 || fcond->Iex.Const.con->Ico.U1 == True); 1915 if (fcond->Iex.Const.con->Ico.U1 == False) { 1916 /* exit is never going to happen, so dump the statement. */ 1917 return IRStmt_NoOp(); 1918 } else { 1919 vassert(fcond->Iex.Const.con->Ico.U1 == True); 1920 /* Hmmm. The exit has become unconditional. Leave it 1921 as it is for now, since we'd have to truncate the BB 1922 at this point, which is tricky. Such truncation is 1923 done later by the dead-code elimination pass. */ 1924 /* fall out into the reconstruct-the-exit code. */ 1925 if (vex_control.iropt_verbosity > 0) 1926 /* really a misuse of vex_control.iropt_verbosity */ 1927 vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); 1928 } 1929 } 1930 return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst); 1931 } 1932 1933 default: 1934 vex_printf("\n"); ppIRStmt(st); 1935 vpanic("subst_and_fold_Stmt"); 1936 } 1937 } 1938 1939 1940 IRSB* cprop_BB ( IRSB* in ) 1941 { 1942 Int i; 1943 IRSB* out; 1944 IRStmt* st2; 1945 Int n_tmps = in->tyenv->types_used; 1946 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*)); 1947 1948 out = emptyIRSB(); 1949 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 1950 1951 /* Set up the env with which travels forward. This holds a 1952 substitution, mapping IRTemps to atoms, that is, IRExprs which 1953 are either IRTemps or IRConsts. Thus, copy and constant 1954 propagation is done. The environment is to be applied as we 1955 move along. Keys are IRTemps. Values are IRExpr*s. 1956 */ 1957 for (i = 0; i < n_tmps; i++) 1958 env[i] = NULL; 1959 1960 /* For each original SSA-form stmt ... */ 1961 for (i = 0; i < in->stmts_used; i++) { 1962 1963 /* First apply the substitution to the current stmt. This 1964 propagates in any constants and tmp-tmp assignments 1965 accumulated prior to this point. As part of the subst_Stmt 1966 call, also then fold any constant expressions resulting. */ 1967 1968 st2 = in->stmts[i]; 1969 1970 /* perhaps st2 is already a no-op? */ 1971 if (st2->tag == Ist_NoOp) continue; 1972 1973 st2 = subst_and_fold_Stmt( env, st2 ); 1974 1975 /* If the statement has been folded into a no-op, forget it. */ 1976 if (st2->tag == Ist_NoOp) continue; 1977 1978 /* Now consider what the stmt looks like. If it's of the form 1979 't = const' or 't1 = t2', add it to the running environment 1980 and not to the output BB. Otherwise, add it to the output 1981 BB. Note, we choose not to propagate const when const is an 1982 F64i, so that F64i literals can be CSE'd later. This helps 1983 x86 floating point code generation. */ 1984 1985 if (st2->tag == Ist_WrTmp 1986 && st2->Ist.WrTmp.data->tag == Iex_Const 1987 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) { 1988 /* 't = const' -- add to env. 1989 The pair (IRTemp, IRExpr*) is added. */ 1990 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 1991 } 1992 else 1993 if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) { 1994 /* 't1 = t2' -- add to env. 1995 The pair (IRTemp, IRExpr*) is added. */ 1996 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 1997 } 1998 else { 1999 /* Not interesting, copy st2 into the output block. */ 2000 addStmtToIRSB( out, st2 ); 2001 } 2002 } 2003 2004 out->next = subst_Expr( env, in->next ); 2005 out->jumpkind = in->jumpkind; 2006 return out; 2007 } 2008 2009 2010 /*---------------------------------------------------------------*/ 2011 /*--- Dead code (t = E) removal ---*/ 2012 /*---------------------------------------------------------------*/ 2013 2014 /* As a side effect, also removes all code following an unconditional 2015 side exit. */ 2016 2017 /* The type of the HashHW map is: a map from IRTemp to nothing 2018 -- really just operating a set or IRTemps. 2019 */ 2020 2021 inline 2022 static void addUses_Temp ( Bool* set, IRTemp tmp ) 2023 { 2024 set[(Int)tmp] = True; 2025 } 2026 2027 static void addUses_Expr ( Bool* set, IRExpr* e ) 2028 { 2029 Int i; 2030 switch (e->tag) { 2031 case Iex_GetI: 2032 addUses_Expr(set, e->Iex.GetI.ix); 2033 return; 2034 case Iex_Mux0X: 2035 addUses_Expr(set, e->Iex.Mux0X.cond); 2036 addUses_Expr(set, e->Iex.Mux0X.expr0); 2037 addUses_Expr(set, e->Iex.Mux0X.exprX); 2038 return; 2039 case Iex_CCall: 2040 for (i = 0; e->Iex.CCall.args[i]; i++) 2041 addUses_Expr(set, e->Iex.CCall.args[i]); 2042 return; 2043 case Iex_Load: 2044 addUses_Expr(set, e->Iex.Load.addr); 2045 return; 2046 case Iex_Qop: 2047 addUses_Expr(set, e->Iex.Qop.arg1); 2048 addUses_Expr(set, e->Iex.Qop.arg2); 2049 addUses_Expr(set, e->Iex.Qop.arg3); 2050 addUses_Expr(set, e->Iex.Qop.arg4); 2051 return; 2052 case Iex_Triop: 2053 addUses_Expr(set, e->Iex.Triop.arg1); 2054 addUses_Expr(set, e->Iex.Triop.arg2); 2055 addUses_Expr(set, e->Iex.Triop.arg3); 2056 return; 2057 case Iex_Binop: 2058 addUses_Expr(set, e->Iex.Binop.arg1); 2059 addUses_Expr(set, e->Iex.Binop.arg2); 2060 return; 2061 case Iex_Unop: 2062 addUses_Expr(set, e->Iex.Unop.arg); 2063 return; 2064 case Iex_RdTmp: 2065 addUses_Temp(set, e->Iex.RdTmp.tmp); 2066 return; 2067 case Iex_Const: 2068 case Iex_Get: 2069 return; 2070 default: 2071 vex_printf("\n"); 2072 ppIRExpr(e); 2073 vpanic("addUses_Expr"); 2074 } 2075 } 2076 2077 static void addUses_Stmt ( Bool* set, IRStmt* st ) 2078 { 2079 Int i; 2080 IRDirty* d; 2081 IRCAS* cas; 2082 switch (st->tag) { 2083 case Ist_AbiHint: 2084 addUses_Expr(set, st->Ist.AbiHint.base); 2085 addUses_Expr(set, st->Ist.AbiHint.nia); 2086 return; 2087 case Ist_PutI: 2088 addUses_Expr(set, st->Ist.PutI.ix); 2089 addUses_Expr(set, st->Ist.PutI.data); 2090 return; 2091 case Ist_WrTmp: 2092 addUses_Expr(set, st->Ist.WrTmp.data); 2093 return; 2094 case Ist_Put: 2095 addUses_Expr(set, st->Ist.Put.data); 2096 return; 2097 case Ist_Store: 2098 addUses_Expr(set, st->Ist.Store.addr); 2099 addUses_Expr(set, st->Ist.Store.data); 2100 return; 2101 case Ist_CAS: 2102 cas = st->Ist.CAS.details; 2103 addUses_Expr(set, cas->addr); 2104 if (cas->expdHi) 2105 addUses_Expr(set, cas->expdHi); 2106 addUses_Expr(set, cas->expdLo); 2107 if (cas->dataHi) 2108 addUses_Expr(set, cas->dataHi); 2109 addUses_Expr(set, cas->dataLo); 2110 return; 2111 case Ist_LLSC: 2112 addUses_Expr(set, st->Ist.LLSC.addr); 2113 if (st->Ist.LLSC.storedata) 2114 addUses_Expr(set, st->Ist.LLSC.storedata); 2115 return; 2116 case Ist_Dirty: 2117 d = st->Ist.Dirty.details; 2118 if (d->mFx != Ifx_None) 2119 addUses_Expr(set, d->mAddr); 2120 addUses_Expr(set, d->guard); 2121 for (i = 0; d->args[i] != NULL; i++) 2122 addUses_Expr(set, d->args[i]); 2123 return; 2124 case Ist_NoOp: 2125 case Ist_IMark: 2126 case Ist_MBE: 2127 return; 2128 case Ist_Exit: 2129 addUses_Expr(set, st->Ist.Exit.guard); 2130 return; 2131 default: 2132 vex_printf("\n"); 2133 ppIRStmt(st); 2134 vpanic("addUses_Stmt"); 2135 } 2136 } 2137 2138 2139 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */ 2140 static Bool isZeroU1 ( IRExpr* e ) 2141 { 2142 return toBool( e->tag == Iex_Const 2143 && e->Iex.Const.con->tag == Ico_U1 2144 && e->Iex.Const.con->Ico.U1 == False ); 2145 } 2146 2147 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */ 2148 static Bool isOneU1 ( IRExpr* e ) 2149 { 2150 return toBool( e->tag == Iex_Const 2151 && e->Iex.Const.con->tag == Ico_U1 2152 && e->Iex.Const.con->Ico.U1 == True ); 2153 } 2154 2155 2156 /* Note, this destructively modifies the given IRSB. */ 2157 2158 /* Scan backwards through statements, carrying a set of IRTemps which 2159 are known to be used after the current point. On encountering 't = 2160 E', delete the binding if it is not used. Otherwise, add any temp 2161 uses to the set and keep on moving backwards. 2162 2163 As an enhancement, the first (backwards) pass searches for IR exits 2164 with always-taken conditions and notes the location of the earliest 2165 one in the block. If any such are found, a second pass copies the 2166 exit destination and jump kind to the bb-end. Then, the exit and 2167 all statements following it are turned into no-ops. 2168 */ 2169 2170 /* notstatic */ void do_deadcode_BB ( IRSB* bb ) 2171 { 2172 Int i, i_unconditional_exit; 2173 Int n_tmps = bb->tyenv->types_used; 2174 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool)); 2175 IRStmt* st; 2176 2177 for (i = 0; i < n_tmps; i++) 2178 set[i] = False; 2179 2180 /* start off by recording IRTemp uses in the next field. */ 2181 addUses_Expr(set, bb->next); 2182 2183 /* First pass */ 2184 2185 /* Work backwards through the stmts */ 2186 i_unconditional_exit = -1; 2187 for (i = bb->stmts_used-1; i >= 0; i--) { 2188 st = bb->stmts[i]; 2189 if (st->tag == Ist_NoOp) 2190 continue; 2191 /* take note of any unconditional exits */ 2192 if (st->tag == Ist_Exit 2193 && isOneU1(st->Ist.Exit.guard)) 2194 i_unconditional_exit = i; 2195 if (st->tag == Ist_WrTmp 2196 && set[(Int)(st->Ist.WrTmp.tmp)] == False) { 2197 /* it's an IRTemp which never got used. Delete it. */ 2198 if (DEBUG_IROPT) { 2199 vex_printf("DEAD: "); 2200 ppIRStmt(st); 2201 vex_printf("\n"); 2202 } 2203 bb->stmts[i] = IRStmt_NoOp(); 2204 } 2205 else 2206 if (st->tag == Ist_Dirty 2207 && st->Ist.Dirty.details->guard 2208 && isZeroU1(st->Ist.Dirty.details->guard)) { 2209 /* This is a dirty helper which will never get called. 2210 Delete it. */ 2211 bb->stmts[i] = IRStmt_NoOp(); 2212 } 2213 else { 2214 /* Note any IRTemp uses made by the current statement. */ 2215 addUses_Stmt(set, st); 2216 } 2217 } 2218 2219 /* Optional second pass: if any unconditional exits were found, 2220 delete them and all following statements. */ 2221 2222 if (i_unconditional_exit != -1) { 2223 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", 2224 i_unconditional_exit); 2225 vassert(i_unconditional_exit >= 0 2226 && i_unconditional_exit < bb->stmts_used); 2227 bb->next 2228 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); 2229 bb->jumpkind 2230 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; 2231 for (i = i_unconditional_exit; i < bb->stmts_used; i++) 2232 bb->stmts[i] = IRStmt_NoOp(); 2233 } 2234 } 2235 2236 2237 /*---------------------------------------------------------------*/ 2238 /*--- Specialisation of helper function calls, in ---*/ 2239 /*--- collaboration with the front end ---*/ 2240 /*---------------------------------------------------------------*/ 2241 2242 static 2243 IRSB* spec_helpers_BB( 2244 IRSB* bb, 2245 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int) 2246 ) 2247 { 2248 Int i; 2249 IRStmt* st; 2250 IRExpr* ex; 2251 Bool any = False; 2252 2253 for (i = bb->stmts_used-1; i >= 0; i--) { 2254 st = bb->stmts[i]; 2255 2256 if (st->tag != Ist_WrTmp 2257 || st->Ist.WrTmp.data->tag != Iex_CCall) 2258 continue; 2259 2260 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, 2261 st->Ist.WrTmp.data->Iex.CCall.args, 2262 &bb->stmts[0], i ); 2263 if (!ex) 2264 /* the front end can't think of a suitable replacement */ 2265 continue; 2266 2267 /* We got something better. Install it in the bb. */ 2268 any = True; 2269 bb->stmts[i] 2270 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); 2271 2272 if (0) { 2273 vex_printf("SPEC: "); 2274 ppIRExpr(st->Ist.WrTmp.data); 2275 vex_printf(" --> "); 2276 ppIRExpr(ex); 2277 vex_printf("\n"); 2278 } 2279 } 2280 2281 if (any) 2282 bb = flatten_BB(bb); 2283 return bb; 2284 } 2285 2286 2287 /*---------------------------------------------------------------*/ 2288 /*--- Determination of guest state aliasing relationships ---*/ 2289 /*---------------------------------------------------------------*/ 2290 2291 /* These are helper functions for CSE and GetI/PutI transformations. 2292 2293 Determine, to the extent possible, the relationship between two 2294 guest state accesses. The possible outcomes are: 2295 2296 * Exact alias. These two accesses denote precisely the same 2297 piece of the guest state. 2298 2299 * Definitely no alias. These two accesses are guaranteed not to 2300 overlap any part of the guest state. 2301 2302 * Unknown -- if neither of the above can be established. 2303 2304 If in doubt, return Unknown. */ 2305 2306 typedef 2307 enum { ExactAlias, NoAlias, UnknownAlias } 2308 GSAliasing; 2309 2310 2311 /* Produces the alias relation between an indexed guest 2312 state access and a non-indexed access. */ 2313 2314 static 2315 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1, 2316 Int offset2, IRType ty2 ) 2317 { 2318 UInt minoff1, maxoff1, minoff2, maxoff2; 2319 2320 getArrayBounds( descr1, &minoff1, &maxoff1 ); 2321 minoff2 = offset2; 2322 maxoff2 = minoff2 + sizeofIRType(ty2) - 1; 2323 2324 if (maxoff1 < minoff2 || maxoff2 < minoff1) 2325 return NoAlias; 2326 2327 /* Could probably do better here if required. For the moment 2328 however just claim not to know anything more. */ 2329 return UnknownAlias; 2330 } 2331 2332 2333 /* Produces the alias relation between two indexed guest state 2334 accesses. */ 2335 2336 static 2337 GSAliasing getAliasingRelation_II ( 2338 IRRegArray* descr1, IRExpr* ix1, Int bias1, 2339 IRRegArray* descr2, IRExpr* ix2, Int bias2 2340 ) 2341 { 2342 UInt minoff1, maxoff1, minoff2, maxoff2; 2343 Int iters; 2344 2345 /* First try hard to show they don't alias. */ 2346 getArrayBounds( descr1, &minoff1, &maxoff1 ); 2347 getArrayBounds( descr2, &minoff2, &maxoff2 ); 2348 if (maxoff1 < minoff2 || maxoff2 < minoff1) 2349 return NoAlias; 2350 2351 /* So the two arrays at least partially overlap. To get any 2352 further we'll have to be sure that the descriptors are 2353 identical. */ 2354 if (!eqIRRegArray(descr1, descr2)) 2355 return UnknownAlias; 2356 2357 /* The descriptors are identical. Now the only difference can be 2358 in the index expressions. If they cannot be shown to be 2359 identical, we have to say we don't know what the aliasing 2360 relation will be. Now, since the IR is flattened, the index 2361 expressions should be atoms -- either consts or tmps. So that 2362 makes the comparison simple. */ 2363 vassert(isIRAtom(ix1)); 2364 vassert(isIRAtom(ix2)); 2365 if (!eqIRAtom(ix1,ix2)) 2366 return UnknownAlias; 2367 2368 /* Ok, the index expressions are identical. So now the only way 2369 they can be different is in the bias. Normalise this 2370 paranoidly, to reliably establish equality/non-equality. */ 2371 2372 /* So now we know that the GetI and PutI index the same array 2373 with the same base. Are the offsets the same, modulo the 2374 array size? Do this paranoidly. */ 2375 vassert(descr1->nElems == descr2->nElems); 2376 vassert(descr1->elemTy == descr2->elemTy); 2377 vassert(descr1->base == descr2->base); 2378 iters = 0; 2379 while (bias1 < 0 || bias2 < 0) { 2380 bias1 += descr1->nElems; 2381 bias2 += descr1->nElems; 2382 iters++; 2383 if (iters > 10) 2384 vpanic("getAliasingRelation: iters"); 2385 } 2386 vassert(bias1 >= 0 && bias2 >= 0); 2387 bias1 %= descr1->nElems; 2388 bias2 %= descr1->nElems; 2389 vassert(bias1 >= 0 && bias1 < descr1->nElems); 2390 vassert(bias2 >= 0 && bias2 < descr1->nElems); 2391 2392 /* Finally, biasP and biasG are normalised into the range 2393 0 .. descrP/G->nElems - 1. And so we can establish 2394 equality/non-equality. */ 2395 2396 return bias1==bias2 ? ExactAlias : NoAlias; 2397 } 2398 2399 2400 /*---------------------------------------------------------------*/ 2401 /*--- Common Subexpression Elimination ---*/ 2402 /*---------------------------------------------------------------*/ 2403 2404 /* Expensive in time and space. */ 2405 2406 /* Uses two environments: 2407 a IRTemp -> IRTemp mapping 2408 a mapping from AvailExpr* to IRTemp 2409 */ 2410 2411 typedef 2412 struct { 2413 enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag; 2414 union { 2415 /* unop(tmp) */ 2416 struct { 2417 IROp op; 2418 IRTemp arg; 2419 } Ut; 2420 /* binop(tmp,tmp) */ 2421 struct { 2422 IROp op; 2423 IRTemp arg1; 2424 IRTemp arg2; 2425 } Btt; 2426 /* binop(tmp,const) */ 2427 struct { 2428 IROp op; 2429 IRTemp arg1; 2430 IRConst con2; 2431 } Btc; 2432 /* binop(const,tmp) */ 2433 struct { 2434 IROp op; 2435 IRConst con1; 2436 IRTemp arg2; 2437 } Bct; 2438 /* F64i-style const */ 2439 struct { 2440 ULong f64i; 2441 } Cf64i; 2442 /* Mux0X(tmp,tmp,tmp) */ 2443 struct { 2444 IRTemp co; 2445 IRTemp e0; 2446 IRTemp eX; 2447 } Mttt; 2448 /* GetI(descr,tmp,bias)*/ 2449 struct { 2450 IRRegArray* descr; 2451 IRTemp ix; 2452 Int bias; 2453 } GetIt; 2454 } u; 2455 } 2456 AvailExpr; 2457 2458 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 ) 2459 { 2460 if (a1->tag != a2->tag) 2461 return False; 2462 switch (a1->tag) { 2463 case Ut: 2464 return toBool( 2465 a1->u.Ut.op == a2->u.Ut.op 2466 && a1->u.Ut.arg == a2->u.Ut.arg); 2467 case Btt: 2468 return toBool( 2469 a1->u.Btt.op == a2->u.Btt.op 2470 && a1->u.Btt.arg1 == a2->u.Btt.arg1 2471 && a1->u.Btt.arg2 == a2->u.Btt.arg2); 2472 case Btc: 2473 return toBool( 2474 a1->u.Btc.op == a2->u.Btc.op 2475 && a1->u.Btc.arg1 == a2->u.Btc.arg1 2476 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2)); 2477 case Bct: 2478 return toBool( 2479 a1->u.Bct.op == a2->u.Bct.op 2480 && a1->u.Bct.arg2 == a2->u.Bct.arg2 2481 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1)); 2482 case Cf64i: 2483 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i); 2484 case Mttt: 2485 return toBool(a1->u.Mttt.co == a2->u.Mttt.co 2486 && a1->u.Mttt.e0 == a2->u.Mttt.e0 2487 && a1->u.Mttt.eX == a2->u.Mttt.eX); 2488 case GetIt: 2489 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr) 2490 && a1->u.GetIt.ix == a2->u.GetIt.ix 2491 && a1->u.GetIt.bias == a2->u.GetIt.bias); 2492 default: vpanic("eq_AvailExpr"); 2493 } 2494 } 2495 2496 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae ) 2497 { 2498 IRConst* con; 2499 switch (ae->tag) { 2500 case Ut: 2501 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) ); 2502 case Btt: 2503 return IRExpr_Binop( ae->u.Btt.op, 2504 IRExpr_RdTmp(ae->u.Btt.arg1), 2505 IRExpr_RdTmp(ae->u.Btt.arg2) ); 2506 case Btc: 2507 con = LibVEX_Alloc(sizeof(IRConst)); 2508 *con = ae->u.Btc.con2; 2509 return IRExpr_Binop( ae->u.Btc.op, 2510 IRExpr_RdTmp(ae->u.Btc.arg1), 2511 IRExpr_Const(con) ); 2512 case Bct: 2513 con = LibVEX_Alloc(sizeof(IRConst)); 2514 *con = ae->u.Bct.con1; 2515 return IRExpr_Binop( ae->u.Bct.op, 2516 IRExpr_Const(con), 2517 IRExpr_RdTmp(ae->u.Bct.arg2) ); 2518 case Cf64i: 2519 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i)); 2520 case Mttt: 2521 return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co), 2522 IRExpr_RdTmp(ae->u.Mttt.e0), 2523 IRExpr_RdTmp(ae->u.Mttt.eX)); 2524 case GetIt: 2525 return IRExpr_GetI(ae->u.GetIt.descr, 2526 IRExpr_RdTmp(ae->u.GetIt.ix), 2527 ae->u.GetIt.bias); 2528 default: 2529 vpanic("availExpr_to_IRExpr"); 2530 } 2531 } 2532 2533 inline 2534 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp ) 2535 { 2536 HWord res; 2537 /* env :: IRTemp -> IRTemp */ 2538 if (lookupHHW( env, &res, (HWord)tmp )) 2539 return (IRTemp)res; 2540 else 2541 return tmp; 2542 } 2543 2544 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae ) 2545 { 2546 /* env :: IRTemp -> IRTemp */ 2547 switch (ae->tag) { 2548 case Ut: 2549 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg ); 2550 break; 2551 case Btt: 2552 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 ); 2553 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 ); 2554 break; 2555 case Btc: 2556 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 ); 2557 break; 2558 case Bct: 2559 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 ); 2560 break; 2561 case Cf64i: 2562 break; 2563 case Mttt: 2564 ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co ); 2565 ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 ); 2566 ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX ); 2567 break; 2568 case GetIt: 2569 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix ); 2570 break; 2571 default: 2572 vpanic("subst_AvailExpr"); 2573 } 2574 } 2575 2576 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e ) 2577 { 2578 AvailExpr* ae; 2579 2580 if (e->tag == Iex_Unop 2581 && e->Iex.Unop.arg->tag == Iex_RdTmp) { 2582 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2583 ae->tag = Ut; 2584 ae->u.Ut.op = e->Iex.Unop.op; 2585 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp; 2586 return ae; 2587 } 2588 2589 if (e->tag == Iex_Binop 2590 && e->Iex.Binop.arg1->tag == Iex_RdTmp 2591 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 2592 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2593 ae->tag = Btt; 2594 ae->u.Btt.op = e->Iex.Binop.op; 2595 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 2596 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 2597 return ae; 2598 } 2599 2600 if (e->tag == Iex_Binop 2601 && e->Iex.Binop.arg1->tag == Iex_RdTmp 2602 && e->Iex.Binop.arg2->tag == Iex_Const) { 2603 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2604 ae->tag = Btc; 2605 ae->u.Btc.op = e->Iex.Binop.op; 2606 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 2607 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con); 2608 return ae; 2609 } 2610 2611 if (e->tag == Iex_Binop 2612 && e->Iex.Binop.arg1->tag == Iex_Const 2613 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 2614 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2615 ae->tag = Bct; 2616 ae->u.Bct.op = e->Iex.Binop.op; 2617 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 2618 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con); 2619 return ae; 2620 } 2621 2622 if (e->tag == Iex_Const 2623 && e->Iex.Const.con->tag == Ico_F64i) { 2624 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2625 ae->tag = Cf64i; 2626 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i; 2627 return ae; 2628 } 2629 2630 if (e->tag == Iex_Mux0X 2631 && e->Iex.Mux0X.cond->tag == Iex_RdTmp 2632 && e->Iex.Mux0X.expr0->tag == Iex_RdTmp 2633 && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) { 2634 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2635 ae->tag = Mttt; 2636 ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp; 2637 ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp; 2638 ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp; 2639 return ae; 2640 } 2641 2642 if (e->tag == Iex_GetI 2643 && e->Iex.GetI.ix->tag == Iex_RdTmp) { 2644 ae = LibVEX_Alloc(sizeof(AvailExpr)); 2645 ae->tag = GetIt; 2646 ae->u.GetIt.descr = e->Iex.GetI.descr; 2647 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp; 2648 ae->u.GetIt.bias = e->Iex.GetI.bias; 2649 return ae; 2650 } 2651 2652 return NULL; 2653 } 2654 2655 2656 /* The BB is modified in-place. Returns True if any changes were 2657 made. */ 2658 2659 static Bool do_cse_BB ( IRSB* bb ) 2660 { 2661 Int i, j, paranoia; 2662 IRTemp t, q; 2663 IRStmt* st; 2664 AvailExpr* eprime; 2665 AvailExpr* ae; 2666 Bool invalidate; 2667 Bool anyDone = False; 2668 2669 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ 2670 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ 2671 2672 vassert(sizeof(IRTemp) <= sizeof(HWord)); 2673 2674 if (0) { ppIRSB(bb); vex_printf("\n\n"); } 2675 2676 /* Iterate forwards over the stmts. 2677 On seeing "t = E", where E is one of the 5 AvailExpr forms: 2678 let E' = apply tenv substitution to E 2679 search aenv for E' 2680 if a mapping E' -> q is found, 2681 replace this stmt by "t = q" 2682 and add binding t -> q to tenv 2683 else 2684 add binding E' -> t to aenv 2685 replace this stmt by "t = E'" 2686 2687 Other statements are only interesting to the extent that they 2688 might invalidate some of the expressions in aenv. So there is 2689 an invalidate-bindings check for each statement seen. 2690 */ 2691 for (i = 0; i < bb->stmts_used; i++) { 2692 st = bb->stmts[i]; 2693 2694 /* ------ BEGIN invalidate aenv bindings ------ */ 2695 /* This is critical: remove from aenv any E' -> .. bindings 2696 which might be invalidated by this statement. The only 2697 vulnerable kind of bindings are the GetI kind. 2698 Dirty call - dump (paranoia level -> 2) 2699 Store - dump (ditto) 2700 Put, PutI - dump unless no-overlap is proven (.. -> 1) 2701 Uses getAliasingRelation_IC and getAliasingRelation_II 2702 to do the no-overlap assessments needed for Put/PutI. 2703 */ 2704 switch (st->tag) { 2705 case Ist_Dirty: case Ist_Store: case Ist_MBE: 2706 case Ist_CAS: case Ist_LLSC: 2707 paranoia = 2; break; 2708 case Ist_Put: case Ist_PutI: 2709 paranoia = 1; break; 2710 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: 2711 case Ist_WrTmp: case Ist_Exit: 2712 paranoia = 0; break; 2713 default: 2714 vpanic("do_cse_BB(1)"); 2715 } 2716 2717 if (paranoia > 0) { 2718 for (j = 0; j < aenv->used; j++) { 2719 if (!aenv->inuse[j]) 2720 continue; 2721 ae = (AvailExpr*)aenv->key[j]; 2722 if (ae->tag != GetIt) 2723 continue; 2724 invalidate = False; 2725 if (paranoia >= 2) { 2726 invalidate = True; 2727 } else { 2728 vassert(paranoia == 1); 2729 if (st->tag == Ist_Put) { 2730 if (getAliasingRelation_IC( 2731 ae->u.GetIt.descr, 2732 IRExpr_RdTmp(ae->u.GetIt.ix), 2733 st->Ist.Put.offset, 2734 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) 2735 ) != NoAlias) 2736 invalidate = True; 2737 } 2738 else 2739 if (st->tag == Ist_PutI) { 2740 if (getAliasingRelation_II( 2741 ae->u.GetIt.descr, 2742 IRExpr_RdTmp(ae->u.GetIt.ix), 2743 ae->u.GetIt.bias, 2744 st->Ist.PutI.descr, 2745 st->Ist.PutI.ix, 2746 st->Ist.PutI.bias 2747 ) != NoAlias) 2748 invalidate = True; 2749 } 2750 else 2751 vpanic("do_cse_BB(2)"); 2752 } 2753 2754 if (invalidate) { 2755 aenv->inuse[j] = False; 2756 aenv->key[j] = (HWord)NULL; /* be sure */ 2757 } 2758 } /* for j */ 2759 } /* paranoia > 0 */ 2760 2761 /* ------ ENV invalidate aenv bindings ------ */ 2762 2763 /* ignore not-interestings */ 2764 if (st->tag != Ist_WrTmp) 2765 continue; 2766 2767 t = st->Ist.WrTmp.tmp; 2768 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data); 2769 /* ignore if not of AvailExpr form */ 2770 if (!eprime) 2771 continue; 2772 2773 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */ 2774 2775 /* apply tenv */ 2776 subst_AvailExpr( tenv, eprime ); 2777 2778 /* search aenv for eprime, unfortunately the hard way */ 2779 for (j = 0; j < aenv->used; j++) 2780 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j])) 2781 break; 2782 2783 if (j < aenv->used) { 2784 /* A binding E' -> q was found. Replace stmt by "t = q" and 2785 note the t->q binding in tenv. */ 2786 /* (this is the core of the CSE action) */ 2787 q = (IRTemp)aenv->val[j]; 2788 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); 2789 addToHHW( tenv, (HWord)t, (HWord)q ); 2790 anyDone = True; 2791 } else { 2792 /* No binding was found, so instead we add E' -> t to our 2793 collection of available expressions, replace this stmt 2794 with "t = E'", and move on. */ 2795 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); 2796 addToHHW( aenv, (HWord)eprime, (HWord)t ); 2797 } 2798 } 2799 2800 /* 2801 ppIRSB(bb); 2802 sanityCheckIRSB(bb, Ity_I32); 2803 vex_printf("\n\n"); 2804 */ 2805 return anyDone; 2806 } 2807 2808 2809 /*---------------------------------------------------------------*/ 2810 /*--- Add32/Sub32 chain collapsing ---*/ 2811 /*---------------------------------------------------------------*/ 2812 2813 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */ 2814 2815 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If 2816 yes, set *tmp and *i32 appropriately. *i32 is set as if the 2817 root node is Add32, not Sub32. */ 2818 2819 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) 2820 { 2821 if (e->tag != Iex_Binop) 2822 return False; 2823 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32) 2824 return False; 2825 if (e->Iex.Binop.arg1->tag != Iex_RdTmp) 2826 return False; 2827 if (e->Iex.Binop.arg2->tag != Iex_Const) 2828 return False; 2829 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 2830 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32); 2831 if (e->Iex.Binop.op == Iop_Sub32) 2832 *i32 = -*i32; 2833 return True; 2834 } 2835 2836 2837 /* Figure out if tmp can be expressed as tmp2 +32 const, for some 2838 other tmp2. Scan backwards from the specified start point -- an 2839 optimisation. */ 2840 2841 static Bool collapseChain ( IRSB* bb, Int startHere, 2842 IRTemp tmp, 2843 IRTemp* tmp2, Int* i32 ) 2844 { 2845 Int j, ii; 2846 IRTemp vv; 2847 IRStmt* st; 2848 IRExpr* e; 2849 2850 /* the (var, con) pair contain the current 'representation' for 2851 'tmp'. We start with 'tmp + 0'. */ 2852 IRTemp var = tmp; 2853 Int con = 0; 2854 2855 /* Scan backwards to see if tmp can be replaced by some other tmp 2856 +/- a constant. */ 2857 for (j = startHere; j >= 0; j--) { 2858 st = bb->stmts[j]; 2859 if (st->tag != Ist_WrTmp) 2860 continue; 2861 if (st->Ist.WrTmp.tmp != var) 2862 continue; 2863 e = st->Ist.WrTmp.data; 2864 if (!isAdd32OrSub32(e, &vv, &ii)) 2865 break; 2866 var = vv; 2867 con += ii; 2868 } 2869 if (j == -1) 2870 /* no earlier binding for var .. ill-formed IR */ 2871 vpanic("collapseChain"); 2872 2873 /* so, did we find anything interesting? */ 2874 if (var == tmp) 2875 return False; /* no .. */ 2876 2877 *tmp2 = var; 2878 *i32 = con; 2879 return True; 2880 } 2881 2882 2883 /* ------- Main function for Add32/Sub32 chain collapsing ------ */ 2884 2885 static void collapse_AddSub_chains_BB ( IRSB* bb ) 2886 { 2887 IRStmt *st; 2888 IRTemp var, var2; 2889 Int i, con, con2; 2890 2891 for (i = bb->stmts_used-1; i >= 0; i--) { 2892 st = bb->stmts[i]; 2893 if (st->tag == Ist_NoOp) 2894 continue; 2895 2896 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */ 2897 2898 if (st->tag == Ist_WrTmp 2899 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) { 2900 2901 /* So e1 is of the form Add32(var,con) or Sub32(var,-con). 2902 Find out if var can be expressed as var2 + con2. */ 2903 if (collapseChain(bb, i-1, var, &var2, &con2)) { 2904 if (DEBUG_IROPT) { 2905 vex_printf("replacing1 "); 2906 ppIRStmt(st); 2907 vex_printf(" with "); 2908 } 2909 con2 += con; 2910 bb->stmts[i] 2911 = IRStmt_WrTmp( 2912 st->Ist.WrTmp.tmp, 2913 (con2 >= 0) 2914 ? IRExpr_Binop(Iop_Add32, 2915 IRExpr_RdTmp(var2), 2916 IRExpr_Const(IRConst_U32(con2))) 2917 : IRExpr_Binop(Iop_Sub32, 2918 IRExpr_RdTmp(var2), 2919 IRExpr_Const(IRConst_U32(-con2))) 2920 ); 2921 if (DEBUG_IROPT) { 2922 ppIRStmt(bb->stmts[i]); 2923 vex_printf("\n"); 2924 } 2925 } 2926 2927 continue; 2928 } 2929 2930 /* Try to collapse 't1 = GetI[t2, con]'. */ 2931 2932 if (st->tag == Ist_WrTmp 2933 && st->Ist.WrTmp.data->tag == Iex_GetI 2934 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp 2935 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix 2936 ->Iex.RdTmp.tmp, &var2, &con2)) { 2937 if (DEBUG_IROPT) { 2938 vex_printf("replacing3 "); 2939 ppIRStmt(st); 2940 vex_printf(" with "); 2941 } 2942 con2 += st->Ist.WrTmp.data->Iex.GetI.bias; 2943 bb->stmts[i] 2944 = IRStmt_WrTmp( 2945 st->Ist.WrTmp.tmp, 2946 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, 2947 IRExpr_RdTmp(var2), 2948 con2)); 2949 if (DEBUG_IROPT) { 2950 ppIRStmt(bb->stmts[i]); 2951 vex_printf("\n"); 2952 } 2953 continue; 2954 } 2955 2956 /* Perhaps st is PutI[t, con] ? */ 2957 2958 if (st->tag == Ist_PutI 2959 && st->Ist.PutI.ix->tag == Iex_RdTmp 2960 && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp, 2961 &var2, &con2)) { 2962 if (DEBUG_IROPT) { 2963 vex_printf("replacing2 "); 2964 ppIRStmt(st); 2965 vex_printf(" with "); 2966 } 2967 con2 += st->Ist.PutI.bias; 2968 bb->stmts[i] 2969 = IRStmt_PutI(st->Ist.PutI.descr, 2970 IRExpr_RdTmp(var2), 2971 con2, 2972 st->Ist.PutI.data); 2973 if (DEBUG_IROPT) { 2974 ppIRStmt(bb->stmts[i]); 2975 vex_printf("\n"); 2976 } 2977 continue; 2978 } 2979 2980 } /* for */ 2981 } 2982 2983 2984 /*---------------------------------------------------------------*/ 2985 /*--- PutI/GetI transformations ---*/ 2986 /*---------------------------------------------------------------*/ 2987 2988 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from 2989 the given starting point to find, if any, a PutI which writes 2990 exactly the same piece of guest state, and so return the expression 2991 that the PutI writes. This is the core of PutI-GetI forwarding. */ 2992 2993 static 2994 IRExpr* findPutI ( IRSB* bb, Int startHere, 2995 IRRegArray* descrG, IRExpr* ixG, Int biasG ) 2996 { 2997 Int j; 2998 IRStmt* st; 2999 GSAliasing relation; 3000 3001 if (0) { 3002 vex_printf("\nfindPutI "); 3003 ppIRRegArray(descrG); 3004 vex_printf(" "); 3005 ppIRExpr(ixG); 3006 vex_printf(" %d\n", biasG); 3007 } 3008 3009 /* Scan backwards in bb from startHere to find a suitable PutI 3010 binding for (descrG, ixG, biasG), if any. */ 3011 3012 for (j = startHere; j >= 0; j--) { 3013 st = bb->stmts[j]; 3014 if (st->tag == Ist_NoOp) 3015 continue; 3016 3017 if (st->tag == Ist_Put) { 3018 /* Non-indexed Put. This can't give a binding, but we do 3019 need to check it doesn't invalidate the search by 3020 overlapping any part of the indexed guest state. */ 3021 3022 relation 3023 = getAliasingRelation_IC( 3024 descrG, ixG, 3025 st->Ist.Put.offset, 3026 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 3027 3028 if (relation == NoAlias) { 3029 /* we're OK; keep going */ 3030 continue; 3031 } else { 3032 /* relation == UnknownAlias || relation == ExactAlias */ 3033 /* If this assertion fails, we've found a Put which writes 3034 an area of guest state which is read by a GetI. Which 3035 is unlikely (although not per se wrong). */ 3036 vassert(relation != ExactAlias); 3037 /* This Put potentially writes guest state that the GetI 3038 reads; we must fail. */ 3039 return NULL; 3040 } 3041 } 3042 3043 if (st->tag == Ist_PutI) { 3044 3045 relation = getAliasingRelation_II( 3046 descrG, ixG, biasG, 3047 st->Ist.PutI.descr, 3048 st->Ist.PutI.ix, 3049 st->Ist.PutI.bias 3050 ); 3051 3052 if (relation == NoAlias) { 3053 /* This PutI definitely doesn't overlap. Ignore it and 3054 keep going. */ 3055 continue; /* the for j loop */ 3056 } 3057 3058 if (relation == UnknownAlias) { 3059 /* We don't know if this PutI writes to the same guest 3060 state that the GetI, or not. So we have to give up. */ 3061 return NULL; 3062 } 3063 3064 /* Otherwise, we've found what we're looking for. */ 3065 vassert(relation == ExactAlias); 3066 return st->Ist.PutI.data; 3067 3068 } /* if (st->tag == Ist_PutI) */ 3069 3070 if (st->tag == Ist_Dirty) { 3071 /* Be conservative. If the dirty call has any guest effects at 3072 all, give up. We could do better -- only give up if there 3073 are any guest writes/modifies. */ 3074 if (st->Ist.Dirty.details->nFxState > 0) 3075 return NULL; 3076 } 3077 3078 } /* for */ 3079 3080 /* No valid replacement was found. */ 3081 return NULL; 3082 } 3083 3084 3085 3086 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense 3087 that it writes exactly the same piece of guest state) ? Safe 3088 answer: False. */ 3089 3090 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 ) 3091 { 3092 vassert(pi->tag == Ist_PutI); 3093 if (s2->tag != Ist_PutI) 3094 return False; 3095 3096 return toBool( 3097 getAliasingRelation_II( 3098 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias, 3099 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias 3100 ) 3101 == ExactAlias 3102 ); 3103 } 3104 3105 3106 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might 3107 overlap it? Safe answer: True. Note, we could do a lot better 3108 than this if needed. */ 3109 3110 static 3111 Bool guestAccessWhichMightOverlapPutI ( 3112 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2 3113 ) 3114 { 3115 GSAliasing relation; 3116 UInt minoffP, maxoffP; 3117 3118 vassert(pi->tag == Ist_PutI); 3119 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP); 3120 switch (s2->tag) { 3121 3122 case Ist_NoOp: 3123 case Ist_IMark: 3124 return False; 3125 3126 case Ist_MBE: 3127 case Ist_AbiHint: 3128 /* just be paranoid ... these should be rare. */ 3129 return True; 3130 3131 case Ist_CAS: 3132 /* This is unbelievably lame, but it's probably not 3133 significant from a performance point of view. Really, a 3134 CAS is a load-store op, so it should be safe to say False. 3135 However .. */ 3136 return True; 3137 3138 case Ist_Dirty: 3139 /* If the dirty call has any guest effects at all, give up. 3140 Probably could do better. */ 3141 if (s2->Ist.Dirty.details->nFxState > 0) 3142 return True; 3143 return False; 3144 3145 case Ist_Put: 3146 vassert(isIRAtom(s2->Ist.Put.data)); 3147 relation 3148 = getAliasingRelation_IC( 3149 pi->Ist.PutI.descr, pi->Ist.PutI.ix, 3150 s2->Ist.Put.offset, 3151 typeOfIRExpr(tyenv,s2->Ist.Put.data) 3152 ); 3153 goto have_relation; 3154 3155 case Ist_PutI: 3156 vassert(isIRAtom(s2->Ist.PutI.ix)); 3157 vassert(isIRAtom(s2->Ist.PutI.data)); 3158 relation 3159 = getAliasingRelation_II( 3160 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias, 3161 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias 3162 ); 3163 goto have_relation; 3164 3165 case Ist_WrTmp: 3166 if (s2->Ist.WrTmp.data->tag == Iex_GetI) { 3167 relation 3168 = getAliasingRelation_II( 3169 pi->Ist.PutI.descr, pi->Ist.PutI.ix, 3170 pi->Ist.PutI.bias, 3171 s2->Ist.WrTmp.data->Iex.GetI.descr, 3172 s2->Ist.WrTmp.data->Iex.GetI.ix, 3173 s2->Ist.WrTmp.data->Iex.GetI.bias 3174 ); 3175 goto have_relation; 3176 } 3177 if (s2->Ist.WrTmp.data->tag == Iex_Get) { 3178 relation 3179 = getAliasingRelation_IC( 3180 pi->Ist.PutI.descr, pi->Ist.PutI.ix, 3181 s2->Ist.WrTmp.data->Iex.Get.offset, 3182 s2->Ist.WrTmp.data->Iex.Get.ty 3183 ); 3184 goto have_relation; 3185 } 3186 return False; 3187 3188 case Ist_Store: 3189 vassert(isIRAtom(s2->Ist.Store.addr)); 3190 vassert(isIRAtom(s2->Ist.Store.data)); 3191 return False; 3192 3193 default: 3194 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n"); 3195 vpanic("guestAccessWhichMightOverlapPutI"); 3196 } 3197 3198 have_relation: 3199 if (relation == NoAlias) 3200 return False; 3201 else 3202 return True; /* ExactAlias or UnknownAlias */ 3203 } 3204 3205 3206 3207 /* ---------- PutI/GetI transformations main functions --------- */ 3208 3209 /* Remove redundant GetIs, to the extent that they can be detected. 3210 bb is modified in-place. */ 3211 3212 static 3213 void do_redundant_GetI_elimination ( IRSB* bb ) 3214 { 3215 Int i; 3216 IRStmt* st; 3217 3218 for (i = bb->stmts_used-1; i >= 0; i--) { 3219 st = bb->stmts[i]; 3220 if (st->tag == Ist_NoOp) 3221 continue; 3222 3223 if (st->tag == Ist_WrTmp 3224 && st->Ist.WrTmp.data->tag == Iex_GetI 3225 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) { 3226 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr; 3227 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix; 3228 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias; 3229 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias); 3230 if (replacement 3231 && isIRAtom(replacement) 3232 /* Make sure we're doing a type-safe transformation! */ 3233 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) { 3234 if (DEBUG_IROPT) { 3235 vex_printf("rGI: "); 3236 ppIRExpr(st->Ist.WrTmp.data); 3237 vex_printf(" -> "); 3238 ppIRExpr(replacement); 3239 vex_printf("\n"); 3240 } 3241 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement); 3242 } 3243 } 3244 } 3245 3246 } 3247 3248 3249 /* Remove redundant PutIs, to the extent which they can be detected. 3250 bb is modified in-place. */ 3251 3252 static 3253 void do_redundant_PutI_elimination ( IRSB* bb ) 3254 { 3255 Int i, j; 3256 Bool delete; 3257 IRStmt *st, *stj; 3258 3259 for (i = 0; i < bb->stmts_used; i++) { 3260 st = bb->stmts[i]; 3261 if (st->tag != Ist_PutI) 3262 continue; 3263 /* Ok, search forwards from here to see if we can find another 3264 PutI which makes this one redundant, and dodging various 3265 hazards. Search forwards: 3266 * If conditional exit, give up (because anything after that 3267 does not postdominate this put). 3268 * If a Get which might overlap, give up (because this PutI 3269 not necessarily dead). 3270 * If a Put which is identical, stop with success. 3271 * If a Put which might overlap, but is not identical, give up. 3272 * If a dirty helper call which might write guest state, give up. 3273 * If a Put which definitely doesn't overlap, or any other 3274 kind of stmt, continue. 3275 */ 3276 delete = False; 3277 for (j = i+1; j < bb->stmts_used; j++) { 3278 stj = bb->stmts[j]; 3279 if (stj->tag == Ist_NoOp) 3280 continue; 3281 if (identicalPutIs(st, stj)) { 3282 /* success! */ 3283 delete = True; 3284 break; 3285 } 3286 if (stj->tag == Ist_Exit) 3287 /* give up */ 3288 break; 3289 if (st->tag == Ist_Dirty) 3290 /* give up; could do better here */ 3291 break; 3292 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj)) 3293 /* give up */ 3294 break; 3295 } 3296 3297 if (delete) { 3298 if (DEBUG_IROPT) { 3299 vex_printf("rPI: "); 3300 ppIRStmt(st); 3301 vex_printf("\n"); 3302 } 3303 bb->stmts[i] = IRStmt_NoOp(); 3304 } 3305 3306 } 3307 } 3308 3309 3310 /*---------------------------------------------------------------*/ 3311 /*--- Loop unrolling ---*/ 3312 /*---------------------------------------------------------------*/ 3313 3314 /* Adjust all tmp values (names) in e by delta. e is destructively 3315 modified. */ 3316 3317 static void deltaIRExpr ( IRExpr* e, Int delta ) 3318 { 3319 Int i; 3320 switch (e->tag) { 3321 case Iex_RdTmp: 3322 e->Iex.RdTmp.tmp += delta; 3323 break; 3324 case Iex_Get: 3325 case Iex_Const: 3326 break; 3327 case Iex_GetI: 3328 deltaIRExpr(e->Iex.GetI.ix, delta); 3329 break; 3330 case Iex_Qop: 3331 deltaIRExpr(e->Iex.Qop.arg1, delta); 3332 deltaIRExpr(e->Iex.Qop.arg2, delta); 3333 deltaIRExpr(e->Iex.Qop.arg3, delta); 3334 deltaIRExpr(e->Iex.Qop.arg4, delta); 3335 break; 3336 case Iex_Triop: 3337 deltaIRExpr(e->Iex.Triop.arg1, delta); 3338 deltaIRExpr(e->Iex.Triop.arg2, delta); 3339 deltaIRExpr(e->Iex.Triop.arg3, delta); 3340 break; 3341 case Iex_Binop: 3342 deltaIRExpr(e->Iex.Binop.arg1, delta); 3343 deltaIRExpr(e->Iex.Binop.arg2, delta); 3344 break; 3345 case Iex_Unop: 3346 deltaIRExpr(e->Iex.Unop.arg, delta); 3347 break; 3348 case Iex_Load: 3349 deltaIRExpr(e->Iex.Load.addr, delta); 3350 break; 3351 case Iex_CCall: 3352 for (i = 0; e->Iex.CCall.args[i]; i++) 3353 deltaIRExpr(e->Iex.CCall.args[i], delta); 3354 break; 3355 case Iex_Mux0X: 3356 deltaIRExpr(e->Iex.Mux0X.cond, delta); 3357 deltaIRExpr(e->Iex.Mux0X.expr0, delta); 3358 deltaIRExpr(e->Iex.Mux0X.exprX, delta); 3359 break; 3360 default: 3361 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3362 vpanic("deltaIRExpr"); 3363 } 3364 } 3365 3366 /* Adjust all tmp values (names) in st by delta. st is destructively 3367 modified. */ 3368 3369 static void deltaIRStmt ( IRStmt* st, Int delta ) 3370 { 3371 Int i; 3372 IRDirty* d; 3373 switch (st->tag) { 3374 case Ist_NoOp: 3375 case Ist_IMark: 3376 case Ist_MBE: 3377 break; 3378 case Ist_AbiHint: 3379 deltaIRExpr(st->Ist.AbiHint.base, delta); 3380 deltaIRExpr(st->Ist.AbiHint.nia, delta); 3381 break; 3382 case Ist_Put: 3383 deltaIRExpr(st->Ist.Put.data, delta); 3384 break; 3385 case Ist_PutI: 3386 deltaIRExpr(st->Ist.PutI.ix, delta); 3387 deltaIRExpr(st->Ist.PutI.data, delta); 3388 break; 3389 case Ist_WrTmp: 3390 st->Ist.WrTmp.tmp += delta; 3391 deltaIRExpr(st->Ist.WrTmp.data, delta); 3392 break; 3393 case Ist_Exit: 3394 deltaIRExpr(st->Ist.Exit.guard, delta); 3395 break; 3396 case Ist_Store: 3397 deltaIRExpr(st->Ist.Store.addr, delta); 3398 deltaIRExpr(st->Ist.Store.data, delta); 3399 break; 3400 case Ist_CAS: 3401 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID) 3402 st->Ist.CAS.details->oldHi += delta; 3403 st->Ist.CAS.details->oldLo += delta; 3404 deltaIRExpr(st->Ist.CAS.details->addr, delta); 3405 if (st->Ist.CAS.details->expdHi) 3406 deltaIRExpr(st->Ist.CAS.details->expdHi, delta); 3407 deltaIRExpr(st->Ist.CAS.details->expdLo, delta); 3408 if (st->Ist.CAS.details->dataHi) 3409 deltaIRExpr(st->Ist.CAS.details->dataHi, delta); 3410 deltaIRExpr(st->Ist.CAS.details->dataLo, delta); 3411 break; 3412 case Ist_LLSC: 3413 st->Ist.LLSC.result += delta; 3414 deltaIRExpr(st->Ist.LLSC.addr, delta); 3415 if (st->Ist.LLSC.storedata) 3416 deltaIRExpr(st->Ist.LLSC.storedata, delta); 3417 break; 3418 case Ist_Dirty: 3419 d = st->Ist.Dirty.details; 3420 deltaIRExpr(d->guard, delta); 3421 for (i = 0; d->args[i]; i++) 3422 deltaIRExpr(d->args[i], delta); 3423 if (d->tmp != IRTemp_INVALID) 3424 d->tmp += delta; 3425 if (d->mAddr) 3426 deltaIRExpr(d->mAddr, delta); 3427 break; 3428 default: 3429 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 3430 vpanic("deltaIRStmt"); 3431 } 3432 } 3433 3434 3435 /* If possible, return a loop-unrolled version of bb0. The original 3436 is changed. If not possible, return NULL. */ 3437 3438 /* The two schemas considered are: 3439 3440 X: BODY; goto X 3441 3442 which unrolls to (eg) X: BODY;BODY; goto X 3443 3444 and 3445 3446 X: BODY; if (c) goto X; goto Y 3447 which trivially transforms to 3448 X: BODY; if (!c) goto Y; goto X; 3449 so it falls in the scope of the first case. 3450 3451 X and Y must be literal (guest) addresses. 3452 */ 3453 3454 static Int calc_unroll_factor( IRSB* bb ) 3455 { 3456 Int n_stmts, i; 3457 3458 n_stmts = 0; 3459 for (i = 0; i < bb->stmts_used; i++) { 3460 if (bb->stmts[i]->tag != Ist_NoOp) 3461 n_stmts++; 3462 } 3463 3464 if (n_stmts <= vex_control.iropt_unroll_thresh/8) { 3465 if (vex_control.iropt_verbosity > 0) 3466 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n", 3467 n_stmts, 8* n_stmts); 3468 return 8; 3469 } 3470 if (n_stmts <= vex_control.iropt_unroll_thresh/4) { 3471 if (vex_control.iropt_verbosity > 0) 3472 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n", 3473 n_stmts, 4* n_stmts); 3474 return 4; 3475 } 3476 3477 if (n_stmts <= vex_control.iropt_unroll_thresh/2) { 3478 if (vex_control.iropt_verbosity > 0) 3479 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n", 3480 n_stmts, 2* n_stmts); 3481 return 2; 3482 } 3483 3484 if (vex_control.iropt_verbosity > 0) 3485 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts); 3486 3487 return 1; 3488 } 3489 3490 3491 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr ) 3492 { 3493 Int i, j, jmax, n_vars; 3494 Bool xxx_known; 3495 Addr64 xxx_value, yyy_value; 3496 IRExpr* udst; 3497 IRStmt* st; 3498 IRConst* con; 3499 IRSB *bb1, *bb2; 3500 Int unroll_factor; 3501 3502 if (vex_control.iropt_unroll_thresh <= 0) 3503 return NULL; 3504 3505 /* First off, figure out if we can unroll this loop. Do this 3506 without modifying bb0. */ 3507 3508 if (bb0->jumpkind != Ijk_Boring) 3509 return NULL; 3510 3511 xxx_known = False; 3512 xxx_value = 0; 3513 3514 /* Extract the next-guest address. If it isn't a literal, we 3515 have to give up. */ 3516 3517 udst = bb0->next; 3518 if (udst->tag == Iex_Const 3519 && (udst->Iex.Const.con->tag == Ico_U32 3520 || udst->Iex.Const.con->tag == Ico_U64)) { 3521 /* The BB ends in a jump to a literal location. */ 3522 xxx_known = True; 3523 xxx_value = udst->Iex.Const.con->tag == Ico_U64 3524 ? udst->Iex.Const.con->Ico.U64 3525 : (Addr64)(udst->Iex.Const.con->Ico.U32); 3526 } 3527 3528 if (!xxx_known) 3529 return NULL; 3530 3531 /* Now we know the BB ends to a jump to a literal location. If 3532 it's a jump to itself (viz, idiom #1), move directly to the 3533 unrolling stage, first cloning the bb so the original isn't 3534 modified. */ 3535 if (xxx_value == my_addr) { 3536 unroll_factor = calc_unroll_factor( bb0 ); 3537 if (unroll_factor < 2) 3538 return NULL; 3539 bb1 = deepCopyIRSB( bb0 ); 3540 bb0 = NULL; 3541 udst = NULL; /* is now invalid */ 3542 goto do_unroll; 3543 } 3544 3545 /* Search for the second idiomatic form: 3546 X: BODY; if (c) goto X; goto Y 3547 We know Y, but need to establish that the last stmt 3548 is 'if (c) goto X'. 3549 */ 3550 yyy_value = xxx_value; 3551 for (i = bb0->stmts_used-1; i >= 0; i--) 3552 if (bb0->stmts[i]) 3553 break; 3554 3555 if (i < 0) 3556 return NULL; /* block with no stmts. Strange. */ 3557 3558 st = bb0->stmts[i]; 3559 if (st->tag != Ist_Exit) 3560 return NULL; 3561 if (st->Ist.Exit.jk != Ijk_Boring) 3562 return NULL; 3563 3564 con = st->Ist.Exit.dst; 3565 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 3566 3567 xxx_value = con->tag == Ico_U64 3568 ? st->Ist.Exit.dst->Ico.U64 3569 : (Addr64)(st->Ist.Exit.dst->Ico.U32); 3570 3571 /* If this assertion fails, we have some kind of type error. */ 3572 vassert(con->tag == udst->Iex.Const.con->tag); 3573 3574 if (xxx_value != my_addr) 3575 /* We didn't find either idiom. Give up. */ 3576 return NULL; 3577 3578 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and 3579 yyy values (which makes it look like idiom #1), and go into 3580 unrolling proper. This means finding (again) the last stmt, in 3581 the copied BB. */ 3582 3583 unroll_factor = calc_unroll_factor( bb0 ); 3584 if (unroll_factor < 2) 3585 return NULL; 3586 3587 bb1 = deepCopyIRSB( bb0 ); 3588 bb0 = NULL; 3589 udst = NULL; /* is now invalid */ 3590 for (i = bb1->stmts_used-1; i >= 0; i--) 3591 if (bb1->stmts[i]) 3592 break; 3593 3594 /* The next bunch of assertions should be true since we already 3595 found and checked the last stmt in the original bb. */ 3596 3597 vassert(i >= 0); 3598 3599 st = bb1->stmts[i]; 3600 vassert(st->tag == Ist_Exit); 3601 3602 con = st->Ist.Exit.dst; 3603 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 3604 3605 udst = bb1->next; 3606 vassert(udst->tag == Iex_Const); 3607 vassert(udst->Iex.Const.con->tag == Ico_U32 3608 || udst->Iex.Const.con->tag == Ico_U64); 3609 vassert(con->tag == udst->Iex.Const.con->tag); 3610 3611 /* switch the xxx and yyy fields around */ 3612 if (con->tag == Ico_U64) { 3613 udst->Iex.Const.con->Ico.U64 = xxx_value; 3614 con->Ico.U64 = yyy_value; 3615 } else { 3616 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value; 3617 con->Ico.U32 = (UInt)yyy_value; 3618 } 3619 3620 /* negate the test condition */ 3621 st->Ist.Exit.guard 3622 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard)); 3623 3624 /* --- The unroller proper. Both idioms are by now --- */ 3625 /* --- now converted to idiom 1. --- */ 3626 3627 do_unroll: 3628 3629 vassert(unroll_factor == 2 3630 || unroll_factor == 4 3631 || unroll_factor == 8); 3632 3633 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1); 3634 for (j = 1; j <= jmax; j++) { 3635 3636 n_vars = bb1->tyenv->types_used; 3637 3638 bb2 = deepCopyIRSB(bb1); 3639 for (i = 0; i < n_vars; i++) 3640 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]); 3641 3642 for (i = 0; i < bb2->stmts_used; i++) { 3643 /* deltaIRStmt destructively modifies the stmt, but 3644 that's OK since bb2 is a complete fresh copy of bb1. */ 3645 deltaIRStmt(bb2->stmts[i], n_vars); 3646 addStmtToIRSB(bb1, bb2->stmts[i]); 3647 } 3648 } 3649 3650 if (DEBUG_IROPT) { 3651 vex_printf("\nUNROLLED (%llx)\n", my_addr); 3652 ppIRSB(bb1); 3653 vex_printf("\n"); 3654 } 3655 3656 /* Flattening; sigh. The unroller succeeds in breaking flatness 3657 by negating the test condition. This should be fixed properly. 3658 For the moment use this shotgun approach. */ 3659 return flatten_BB(bb1); 3660 } 3661 3662 3663 /*---------------------------------------------------------------*/ 3664 /*--- The tree builder ---*/ 3665 /*---------------------------------------------------------------*/ 3666 3667 /* This isn't part of IR optimisation. Really it's a pass done prior 3668 to instruction selection, which improves the code that the 3669 instruction selector can produce. */ 3670 3671 /* --- The 'tmp' environment is the central data structure here --- */ 3672 3673 /* The number of outstanding bindings we're prepared to track. 3674 The number of times the env becomes full and we have to dump 3675 the oldest binding (hence reducing code quality) falls very 3676 rapidly as the env size increases. 8 gives reasonable performance 3677 under most circumstances. */ 3678 #define A_NENV 10 3679 3680 /* bindee == NULL === slot is not in use 3681 bindee != NULL === slot is in use 3682 */ 3683 typedef 3684 struct { 3685 IRTemp binder; 3686 IRExpr* bindee; 3687 Bool doesLoad; 3688 Bool doesGet; 3689 } 3690 ATmpInfo; 3691 3692 __attribute__((unused)) 3693 static void ppAEnv ( ATmpInfo* env ) 3694 { 3695 Int i; 3696 for (i = 0; i < A_NENV; i++) { 3697 vex_printf("%d tmp %d val ", i, (Int)env[i].binder); 3698 if (env[i].bindee) 3699 ppIRExpr(env[i].bindee); 3700 else 3701 vex_printf("(null)"); 3702 vex_printf("\n"); 3703 } 3704 } 3705 3706 /* --- Tree-traversal fns --- */ 3707 3708 /* Traverse an expr, and detect if any part of it reads memory or does 3709 a Get. Be careful ... this really controls how much the 3710 tree-builder can reorder the code, so getting it right is critical. 3711 */ 3712 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e ) 3713 { 3714 Int i; 3715 switch (e->tag) { 3716 case Iex_CCall: 3717 for (i = 0; e->Iex.CCall.args[i]; i++) 3718 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]); 3719 return; 3720 case Iex_Mux0X: 3721 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond); 3722 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0); 3723 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX); 3724 return; 3725 case Iex_Qop: 3726 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1); 3727 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2); 3728 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3); 3729 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4); 3730 return; 3731 case Iex_Triop: 3732 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1); 3733 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2); 3734 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3); 3735 return; 3736 case Iex_Binop: 3737 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1); 3738 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2); 3739 return; 3740 case Iex_Unop: 3741 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg); 3742 return; 3743 case Iex_Load: 3744 *doesLoad = True; 3745 setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr); 3746 return; 3747 case Iex_Get: 3748 *doesGet = True; 3749 return; 3750 case Iex_GetI: 3751 *doesGet = True; 3752 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix); 3753 return; 3754 case Iex_RdTmp: 3755 case Iex_Const: 3756 return; 3757 default: 3758 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3759 vpanic("setHints_Expr"); 3760 } 3761 } 3762 3763 3764 /* Add a binding to the front of the env and slide all the rest 3765 backwards. It should be the case that the last slot is free. */ 3766 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee ) 3767 { 3768 Int i; 3769 vassert(env[A_NENV-1].bindee == NULL); 3770 for (i = A_NENV-1; i >= 1; i--) 3771 env[i] = env[i-1]; 3772 env[0].binder = binder; 3773 env[0].bindee = bindee; 3774 env[0].doesLoad = False; /* filled in later */ 3775 env[0].doesGet = False; /* filled in later */ 3776 } 3777 3778 /* Given uses :: array of UShort, indexed by IRTemp 3779 Add the use-occurrences of temps in this expression 3780 to the env. 3781 */ 3782 static void aoccCount_Expr ( UShort* uses, IRExpr* e ) 3783 { 3784 Int i; 3785 3786 switch (e->tag) { 3787 3788 case Iex_RdTmp: /* the only interesting case */ 3789 uses[e->Iex.RdTmp.tmp]++; 3790 return; 3791 3792 case Iex_Mux0X: 3793 aoccCount_Expr(uses, e->Iex.Mux0X.cond); 3794 aoccCount_Expr(uses, e->Iex.Mux0X.expr0); 3795 aoccCount_Expr(uses, e->Iex.Mux0X.exprX); 3796 return; 3797 3798 case Iex_Qop: 3799 aoccCount_Expr(uses, e->Iex.Qop.arg1); 3800 aoccCount_Expr(uses, e->Iex.Qop.arg2); 3801 aoccCount_Expr(uses, e->Iex.Qop.arg3); 3802 aoccCount_Expr(uses, e->Iex.Qop.arg4); 3803 return; 3804 3805 case Iex_Triop: 3806 aoccCount_Expr(uses, e->Iex.Triop.arg1); 3807 aoccCount_Expr(uses, e->Iex.Triop.arg2); 3808 aoccCount_Expr(uses, e->Iex.Triop.arg3); 3809 return; 3810 3811 case Iex_Binop: 3812 aoccCount_Expr(uses, e->Iex.Binop.arg1); 3813 aoccCount_Expr(uses, e->Iex.Binop.arg2); 3814 return; 3815 3816 case Iex_Unop: 3817 aoccCount_Expr(uses, e->Iex.Unop.arg); 3818 return; 3819 3820 case Iex_Load: 3821 aoccCount_Expr(uses, e->Iex.Load.addr); 3822 return; 3823 3824 case Iex_CCall: 3825 for (i = 0; e->Iex.CCall.args[i]; i++) 3826 aoccCount_Expr(uses, e->Iex.CCall.args[i]); 3827 return; 3828 3829 case Iex_GetI: 3830 aoccCount_Expr(uses, e->Iex.GetI.ix); 3831 return; 3832 3833 case Iex_Const: 3834 case Iex_Get: 3835 return; 3836 3837 default: 3838 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 3839 vpanic("aoccCount_Expr"); 3840 } 3841 } 3842 3843 3844 /* Given uses :: array of UShort, indexed by IRTemp 3845 Add the use-occurrences of temps in this statement 3846 to the env. 3847 */ 3848 static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) 3849 { 3850 Int i; 3851 IRDirty* d; 3852 IRCAS* cas; 3853 switch (st->tag) { 3854 case Ist_AbiHint: 3855 aoccCount_Expr(uses, st->Ist.AbiHint.base); 3856 aoccCount_Expr(uses, st->Ist.AbiHint.nia); 3857 return; 3858 case Ist_WrTmp: 3859 aoccCount_Expr(uses, st->Ist.WrTmp.data); 3860 return; 3861 case Ist_Put: 3862 aoccCount_Expr(uses, st->Ist.Put.data); 3863 return; 3864 case Ist_PutI: 3865 aoccCount_Expr(uses, st->Ist.PutI.ix); 3866 aoccCount_Expr(uses, st->Ist.PutI.data); 3867 return; 3868 case Ist_Store: 3869 aoccCount_Expr(uses, st->Ist.Store.addr); 3870 aoccCount_Expr(uses, st->Ist.Store.data); 3871 return; 3872 case Ist_CAS: 3873 cas = st->Ist.CAS.details; 3874 aoccCount_Expr(uses, cas->addr); 3875 if (cas->expdHi) 3876 aoccCount_Expr(uses, cas->expdHi); 3877 aoccCount_Expr(uses, cas->expdLo); 3878 if (cas->dataHi) 3879 aoccCount_Expr(uses, cas->dataHi); 3880 aoccCount_Expr(uses, cas->dataLo); 3881 return; 3882 case Ist_LLSC: 3883 aoccCount_Expr(uses, st->Ist.LLSC.addr); 3884 if (st->Ist.LLSC.storedata) 3885 aoccCount_Expr(uses, st->Ist.LLSC.storedata); 3886 return; 3887 case Ist_Dirty: 3888 d = st->Ist.Dirty.details; 3889 if (d->mFx != Ifx_None) 3890 aoccCount_Expr(uses, d->mAddr); 3891 aoccCount_Expr(uses, d->guard); 3892 for (i = 0; d->args[i]; i++) 3893 aoccCount_Expr(uses, d->args[i]); 3894 return; 3895 case Ist_NoOp: 3896 case Ist_IMark: 3897 case Ist_MBE: 3898 return; 3899 case Ist_Exit: 3900 aoccCount_Expr(uses, st->Ist.Exit.guard); 3901 return; 3902 default: 3903 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 3904 vpanic("aoccCount_Stmt"); 3905 } 3906 } 3907 3908 /* Look up a binding for tmp in the env. If found, return the bound 3909 expression, and set the env's binding to NULL so it is marked as 3910 used. If not found, return NULL. */ 3911 3912 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 3913 { 3914 Int i; 3915 for (i = 0; i < A_NENV; i++) { 3916 if (env[i].binder == tmp && env[i].bindee != NULL) { 3917 IRExpr* bindee = env[i].bindee; 3918 env[i].bindee = NULL; 3919 return bindee; 3920 } 3921 } 3922 return NULL; 3923 } 3924 3925 /* Traverse e, looking for temps. For each observed temp, see if env 3926 contains a binding for the temp, and if so return the bound value. 3927 The env has the property that any binding it holds is 3928 'single-shot', so once a binding is used, it is marked as no longer 3929 available, by setting its .bindee field to NULL. */ 3930 3931 static inline Bool is_Unop ( IRExpr* e, IROp op ) { 3932 return e->tag == Iex_Unop && e->Iex.Unop.op == op; 3933 } 3934 static inline Bool is_Binop ( IRExpr* e, IROp op ) { 3935 return e->tag == Iex_Binop && e->Iex.Binop.op == op; 3936 } 3937 3938 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 3939 { 3940 switch (op) { 3941 case Iop_Or32: 3942 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ 3943 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) 3944 return IRExpr_Unop( Iop_CmpwNEZ32, 3945 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 3946 a2->Iex.Unop.arg ) ); 3947 break; 3948 default: 3949 break; 3950 } 3951 /* no reduction rule applies */ 3952 return IRExpr_Binop( op, a1, a2 ); 3953 } 3954 3955 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) 3956 { 3957 switch (op) { 3958 case Iop_CmpwNEZ64: 3959 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 3960 if (is_Binop(aa, Iop_Or64) 3961 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) 3962 return fold_IRExpr_Unop( 3963 Iop_CmpwNEZ64, 3964 IRExpr_Binop(Iop_Or64, 3965 aa->Iex.Binop.arg1->Iex.Unop.arg, 3966 aa->Iex.Binop.arg2)); 3967 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 3968 if (is_Binop(aa, Iop_Or64) 3969 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) 3970 return fold_IRExpr_Unop( 3971 Iop_CmpwNEZ64, 3972 IRExpr_Binop(Iop_Or64, 3973 aa->Iex.Binop.arg1, 3974 aa->Iex.Binop.arg2->Iex.Unop.arg)); 3975 break; 3976 case Iop_CmpNEZ64: 3977 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ 3978 if (is_Unop(aa, Iop_Left64)) 3979 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); 3980 break; 3981 case Iop_CmpwNEZ32: 3982 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ 3983 if (is_Unop(aa, Iop_CmpwNEZ32)) 3984 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); 3985 break; 3986 case Iop_CmpNEZ32: 3987 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ 3988 if (is_Unop(aa, Iop_Left32)) 3989 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); 3990 break; 3991 case Iop_Left32: 3992 /* Left32( Left32(x) ) --> Left32(x) */ 3993 if (is_Unop(aa, Iop_Left32)) 3994 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); 3995 break; 3996 case Iop_32to1: 3997 /* 32to1( 1Uto32 ( x ) ) --> x */ 3998 if (is_Unop(aa, Iop_1Uto32)) 3999 return aa->Iex.Unop.arg; 4000 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ 4001 if (is_Unop(aa, Iop_CmpwNEZ32)) 4002 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); 4003 break; 4004 case Iop_64to1: 4005 /* 64to1( 1Uto64 ( x ) ) --> x */ 4006 if (is_Unop(aa, Iop_1Uto64)) 4007 return aa->Iex.Unop.arg; 4008 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ 4009 if (is_Unop(aa, Iop_CmpwNEZ64)) 4010 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); 4011 break; 4012 case Iop_64to32: 4013 /* 64to32( 32Uto64 ( x )) --> x */ 4014 if (is_Unop(aa, Iop_32Uto64)) 4015 return aa->Iex.Unop.arg; 4016 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */ 4017 if (is_Unop(aa, Iop_8Uto64)) 4018 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg); 4019 break; 4020 4021 case Iop_32Uto64: 4022 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */ 4023 if (is_Unop(aa, Iop_8Uto32)) 4024 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg); 4025 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */ 4026 if (is_Unop(aa, Iop_16Uto32)) 4027 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg); 4028 break; 4029 4030 case Iop_1Sto32: 4031 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */ 4032 if (is_Unop(aa, Iop_CmpNEZ8) 4033 && is_Unop(aa->Iex.Unop.arg, Iop_32to8) 4034 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32) 4035 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg, 4036 Iop_CmpNEZ32)) { 4037 return IRExpr_Unop( Iop_CmpwNEZ32, 4038 aa->Iex.Unop.arg->Iex.Unop.arg 4039 ->Iex.Unop.arg->Iex.Unop.arg); 4040 } 4041 break; 4042 4043 4044 default: 4045 break; 4046 } 4047 /* no reduction rule applies */ 4048 return IRExpr_Unop( op, aa ); 4049 } 4050 4051 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) 4052 { 4053 IRExpr* e2; 4054 IRExpr** args2; 4055 Int i; 4056 4057 switch (e->tag) { 4058 4059 case Iex_CCall: 4060 args2 = shallowCopyIRExprVec(e->Iex.CCall.args); 4061 for (i = 0; args2[i]; i++) 4062 args2[i] = atbSubst_Expr(env,args2[i]); 4063 return IRExpr_CCall( 4064 e->Iex.CCall.cee, 4065 e->Iex.CCall.retty, 4066 args2 4067 ); 4068 case Iex_RdTmp: 4069 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp); 4070 return e2 ? e2 : e; 4071 case Iex_Mux0X: 4072 return IRExpr_Mux0X( 4073 atbSubst_Expr(env, e->Iex.Mux0X.cond), 4074 atbSubst_Expr(env, e->Iex.Mux0X.expr0), 4075 atbSubst_Expr(env, e->Iex.Mux0X.exprX) 4076 ); 4077 case Iex_Qop: 4078 return IRExpr_Qop( 4079 e->Iex.Qop.op, 4080 atbSubst_Expr(env, e->Iex.Qop.arg1), 4081 atbSubst_Expr(env, e->Iex.Qop.arg2), 4082 atbSubst_Expr(env, e->Iex.Qop.arg3), 4083 atbSubst_Expr(env, e->Iex.Qop.arg4) 4084 ); 4085 case Iex_Triop: 4086 return IRExpr_Triop( 4087 e->Iex.Triop.op, 4088 atbSubst_Expr(env, e->Iex.Triop.arg1), 4089 atbSubst_Expr(env, e->Iex.Triop.arg2), 4090 atbSubst_Expr(env, e->Iex.Triop.arg3) 4091 ); 4092 case Iex_Binop: 4093 return fold_IRExpr_Binop( 4094 e->Iex.Binop.op, 4095 atbSubst_Expr(env, e->Iex.Binop.arg1), 4096 atbSubst_Expr(env, e->Iex.Binop.arg2) 4097 ); 4098 case Iex_Unop: 4099 return fold_IRExpr_Unop( 4100 e->Iex.Unop.op, 4101 atbSubst_Expr(env, e->Iex.Unop.arg) 4102 ); 4103 case Iex_Load: 4104 return IRExpr_Load( 4105 e->Iex.Load.end, 4106 e->Iex.Load.ty, 4107 atbSubst_Expr(env, e->Iex.Load.addr) 4108 ); 4109 case Iex_GetI: 4110 return IRExpr_GetI( 4111 e->Iex.GetI.descr, 4112 atbSubst_Expr(env, e->Iex.GetI.ix), 4113 e->Iex.GetI.bias 4114 ); 4115 case Iex_Const: 4116 case Iex_Get: 4117 return e; 4118 default: 4119 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4120 vpanic("atbSubst_Expr"); 4121 } 4122 } 4123 4124 /* Same deal as atbSubst_Expr, except for stmts. */ 4125 4126 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) 4127 { 4128 Int i; 4129 IRDirty *d, *d2; 4130 IRCAS *cas, *cas2; 4131 switch (st->tag) { 4132 case Ist_AbiHint: 4133 return IRStmt_AbiHint( 4134 atbSubst_Expr(env, st->Ist.AbiHint.base), 4135 st->Ist.AbiHint.len, 4136 atbSubst_Expr(env, st->Ist.AbiHint.nia) 4137 ); 4138 case Ist_Store: 4139 return IRStmt_Store( 4140 st->Ist.Store.end, 4141 atbSubst_Expr(env, st->Ist.Store.addr), 4142 atbSubst_Expr(env, st->Ist.Store.data) 4143 ); 4144 case Ist_WrTmp: 4145 return IRStmt_WrTmp( 4146 st->Ist.WrTmp.tmp, 4147 atbSubst_Expr(env, st->Ist.WrTmp.data) 4148 ); 4149 case Ist_Put: 4150 return IRStmt_Put( 4151 st->Ist.Put.offset, 4152 atbSubst_Expr(env, st->Ist.Put.data) 4153 ); 4154 case Ist_PutI: 4155 return IRStmt_PutI( 4156 st->Ist.PutI.descr, 4157 atbSubst_Expr(env, st->Ist.PutI.ix), 4158 st->Ist.PutI.bias, 4159 atbSubst_Expr(env, st->Ist.PutI.data) 4160 ); 4161 4162 case Ist_Exit: 4163 return IRStmt_Exit( 4164 atbSubst_Expr(env, st->Ist.Exit.guard), 4165 st->Ist.Exit.jk, 4166 st->Ist.Exit.dst 4167 ); 4168 case Ist_IMark: 4169 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len); 4170 case Ist_NoOp: 4171 return IRStmt_NoOp(); 4172 case Ist_MBE: 4173 return IRStmt_MBE(st->Ist.MBE.event); 4174 case Ist_CAS: 4175 cas = st->Ist.CAS.details; 4176 cas2 = mkIRCAS( 4177 cas->oldHi, cas->oldLo, cas->end, 4178 atbSubst_Expr(env, cas->addr), 4179 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL, 4180 atbSubst_Expr(env, cas->expdLo), 4181 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL, 4182 atbSubst_Expr(env, cas->dataLo) 4183 ); 4184 return IRStmt_CAS(cas2); 4185 case Ist_LLSC: 4186 return IRStmt_LLSC( 4187 st->Ist.LLSC.end, 4188 st->Ist.LLSC.result, 4189 atbSubst_Expr(env, st->Ist.LLSC.addr), 4190 st->Ist.LLSC.storedata 4191 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL 4192 ); 4193 case Ist_Dirty: 4194 d = st->Ist.Dirty.details; 4195 d2 = emptyIRDirty(); 4196 *d2 = *d; 4197 if (d2->mFx != Ifx_None) 4198 d2->mAddr = atbSubst_Expr(env, d2->mAddr); 4199 d2->guard = atbSubst_Expr(env, d2->guard); 4200 for (i = 0; d2->args[i]; i++) 4201 d2->args[i] = atbSubst_Expr(env, d2->args[i]); 4202 return IRStmt_Dirty(d2); 4203 default: 4204 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4205 vpanic("atbSubst_Stmt"); 4206 } 4207 } 4208 4209 /* notstatic */ void ado_treebuild_BB ( IRSB* bb ) 4210 { 4211 Int i, j, k, m; 4212 Bool stmtPuts, stmtStores, invalidateMe; 4213 IRStmt* st; 4214 IRStmt* st2; 4215 ATmpInfo env[A_NENV]; 4216 4217 Int n_tmps = bb->tyenv->types_used; 4218 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort)); 4219 4220 /* Phase 1. Scan forwards in bb, counting use occurrences of each 4221 temp. Also count occurrences in the bb->next field. */ 4222 4223 for (i = 0; i < n_tmps; i++) 4224 uses[i] = 0; 4225 4226 for (i = 0; i < bb->stmts_used; i++) { 4227 st = bb->stmts[i]; 4228 if (st->tag == Ist_NoOp) 4229 continue; 4230 aoccCount_Stmt( uses, st ); 4231 } 4232 aoccCount_Expr(uses, bb->next ); 4233 4234 # if 0 4235 for (i = 0; i < n_tmps; i++) { 4236 if (uses[i] == 0) 4237 continue; 4238 ppIRTemp( (IRTemp)i ); 4239 vex_printf(" used %d\n", (Int)uses[i] ); 4240 } 4241 # endif 4242 4243 /* Phase 2. Scan forwards in bb. For each statement in turn: 4244 4245 If the env is full, emit the end element. This guarantees 4246 there is at least one free slot in the following. 4247 4248 On seeing 't = E', occ(t)==1, 4249 let E'=env(E) 4250 delete this stmt 4251 add t -> E' to the front of the env 4252 Examine E' and set the hints for E' appropriately 4253 (doesLoad? doesGet?) 4254 4255 On seeing any other stmt, 4256 let stmt' = env(stmt) 4257 remove from env any 't=E' binds invalidated by stmt 4258 emit the invalidated stmts 4259 emit stmt' 4260 compact any holes in env 4261 by sliding entries towards the front 4262 4263 Finally, apply env to bb->next. 4264 */ 4265 4266 for (i = 0; i < A_NENV; i++) { 4267 env[i].bindee = NULL; 4268 env[i].binder = IRTemp_INVALID; 4269 } 4270 4271 /* The stmts in bb are being reordered, and we are guaranteed to 4272 end up with no more than the number we started with. Use i to 4273 be the cursor of the current stmt examined and j <= i to be that 4274 for the current stmt being written. 4275 */ 4276 j = 0; 4277 for (i = 0; i < bb->stmts_used; i++) { 4278 4279 st = bb->stmts[i]; 4280 if (st->tag == Ist_NoOp) 4281 continue; 4282 4283 /* Ensure there's at least one space in the env, by emitting 4284 the oldest binding if necessary. */ 4285 if (env[A_NENV-1].bindee != NULL) { 4286 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder, 4287 env[A_NENV-1].bindee ); 4288 j++; 4289 vassert(j <= i); 4290 env[A_NENV-1].bindee = NULL; 4291 } 4292 4293 /* Consider current stmt. */ 4294 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) { 4295 IRExpr *e, *e2; 4296 4297 /* optional extra: dump dead bindings as we find them. 4298 Removes the need for a prior dead-code removal pass. */ 4299 if (uses[st->Ist.WrTmp.tmp] == 0) { 4300 if (0) vex_printf("DEAD binding\n"); 4301 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 4302 } 4303 vassert(uses[st->Ist.WrTmp.tmp] == 1); 4304 4305 /* ok, we have 't = E', occ(t)==1. Do the abovementioned 4306 actions. */ 4307 e = st->Ist.WrTmp.data; 4308 e2 = atbSubst_Expr(env, e); 4309 addToEnvFront(env, st->Ist.WrTmp.tmp, e2); 4310 setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2); 4311 /* don't advance j, as we are deleting this stmt and instead 4312 holding it temporarily in the env. */ 4313 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 4314 } 4315 4316 /* we get here for any other kind of statement. */ 4317 /* 'use up' any bindings required by the current statement. */ 4318 st2 = atbSubst_Stmt(env, st); 4319 4320 /* Now, before this stmt, dump any bindings in env that it 4321 invalidates. These need to be dumped in the order in which 4322 they originally entered env -- that means from oldest to 4323 youngest. */ 4324 4325 /* stmtPuts/stmtStores characterise what the stmt under 4326 consideration does, or might do (sidely safe @ True). */ 4327 stmtPuts 4328 = toBool( st->tag == Ist_Put 4329 || st->tag == Ist_PutI 4330 || st->tag == Ist_Dirty ); 4331 4332 /* be True if this stmt writes memory or might do (==> we don't 4333 want to reorder other loads or stores relative to it). Also, 4334 both LL and SC fall under this classification, since we 4335 really ought to be conservative and not reorder any other 4336 memory transactions relative to them. */ 4337 stmtStores 4338 = toBool( st->tag == Ist_Store 4339 || st->tag == Ist_Dirty 4340 || st->tag == Ist_LLSC ); 4341 4342 for (k = A_NENV-1; k >= 0; k--) { 4343 if (env[k].bindee == NULL) 4344 continue; 4345 /* Compare the actions of this stmt with the actions of 4346 binding 'k', to see if they invalidate the binding. */ 4347 invalidateMe 4348 = toBool( 4349 /* a store invalidates loaded data */ 4350 (env[k].doesLoad && stmtStores) 4351 /* a put invalidates get'd data */ 4352 || (env[k].doesGet && stmtPuts) 4353 /* a put invalidates loaded data. Note, we could do 4354 much better here in the sense that we only need to 4355 invalidate trees containing loads if the Put in 4356 question is marked as requiring precise 4357 exceptions. */ 4358 || (env[k].doesLoad && stmtPuts) 4359 /* probably overly conservative: a memory bus event 4360 invalidates absolutely everything, so that all 4361 computation prior to it is forced to complete before 4362 proceeding with the event (fence,lock,unlock). */ 4363 || st->tag == Ist_MBE 4364 /* also be (probably overly) paranoid re AbiHints */ 4365 || st->tag == Ist_AbiHint 4366 ); 4367 if (invalidateMe) { 4368 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee ); 4369 j++; 4370 vassert(j <= i); 4371 env[k].bindee = NULL; 4372 } 4373 } 4374 4375 /* Slide in-use entries in env up to the front */ 4376 m = 0; 4377 for (k = 0; k < A_NENV; k++) { 4378 if (env[k].bindee != NULL) { 4379 env[m] = env[k]; 4380 m++; 4381 } 4382 } 4383 for (m = m; m < A_NENV; m++) { 4384 env[m].bindee = NULL; 4385 } 4386 4387 /* finally, emit the substituted statement */ 4388 bb->stmts[j] = st2; 4389 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */ 4390 j++; 4391 4392 vassert(j <= i+1); 4393 } /* for each stmt in the original bb ... */ 4394 4395 /* Finally ... substitute the ->next field as much as possible, and 4396 dump any left-over bindings. Hmm. Perhaps there should be no 4397 left over bindings? Or any left-over bindings are 4398 by definition dead? */ 4399 bb->next = atbSubst_Expr(env, bb->next); 4400 bb->stmts_used = j; 4401 } 4402 4403 4404 /*---------------------------------------------------------------*/ 4405 /*--- iropt main ---*/ 4406 /*---------------------------------------------------------------*/ 4407 4408 static Bool iropt_verbose = False; /* True; */ 4409 4410 4411 /* Do a simple cleanup pass on bb. This is: redundant Get removal, 4412 redundant Put removal, constant propagation, dead code removal, 4413 clean helper specialisation, and dead code removal (again). 4414 */ 4415 4416 4417 static 4418 IRSB* cheap_transformations ( 4419 IRSB* bb, 4420 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int), 4421 Bool (*preciseMemExnsFn)(Int,Int) 4422 ) 4423 { 4424 redundant_get_removal_BB ( bb ); 4425 if (iropt_verbose) { 4426 vex_printf("\n========= REDUNDANT GET\n\n" ); 4427 ppIRSB(bb); 4428 } 4429 4430 redundant_put_removal_BB ( bb, preciseMemExnsFn ); 4431 if (iropt_verbose) { 4432 vex_printf("\n========= REDUNDANT PUT\n\n" ); 4433 ppIRSB(bb); 4434 } 4435 4436 bb = cprop_BB ( bb ); 4437 if (iropt_verbose) { 4438 vex_printf("\n========= CPROPD\n\n" ); 4439 ppIRSB(bb); 4440 } 4441 4442 do_deadcode_BB ( bb ); 4443 if (iropt_verbose) { 4444 vex_printf("\n========= DEAD\n\n" ); 4445 ppIRSB(bb); 4446 } 4447 4448 bb = spec_helpers_BB ( bb, specHelper ); 4449 do_deadcode_BB ( bb ); 4450 if (iropt_verbose) { 4451 vex_printf("\n========= SPECd \n\n" ); 4452 ppIRSB(bb); 4453 } 4454 4455 return bb; 4456 } 4457 4458 4459 /* Do some more expensive transformations on bb, which are aimed at 4460 optimising as much as possible in the presence of GetI and PutI. */ 4461 4462 static 4463 IRSB* expensive_transformations( IRSB* bb ) 4464 { 4465 (void)do_cse_BB( bb ); 4466 collapse_AddSub_chains_BB( bb ); 4467 do_redundant_GetI_elimination( bb ); 4468 do_redundant_PutI_elimination( bb ); 4469 do_deadcode_BB( bb ); 4470 return bb; 4471 } 4472 4473 4474 /* Scan a flattened BB to look for signs that more expensive 4475 optimisations might be useful: 4476 - find out if there are any GetIs and PutIs 4477 - find out if there are any floating or vector-typed temporaries 4478 */ 4479 4480 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, 4481 /*OUT*/Bool* hasVorFtemps, 4482 IRSB* bb ) 4483 { 4484 Int i, j; 4485 IRStmt* st; 4486 IRDirty* d; 4487 IRCAS* cas; 4488 4489 *hasGetIorPutI = False; 4490 *hasVorFtemps = False; 4491 4492 for (i = 0; i < bb->stmts_used; i++) { 4493 st = bb->stmts[i]; 4494 switch (st->tag) { 4495 case Ist_AbiHint: 4496 vassert(isIRAtom(st->Ist.AbiHint.base)); 4497 vassert(isIRAtom(st->Ist.AbiHint.nia)); 4498 break; 4499 case Ist_PutI: 4500 *hasGetIorPutI = True; 4501 break; 4502 case Ist_WrTmp: 4503 if (st->Ist.WrTmp.data->tag == Iex_GetI) 4504 *hasGetIorPutI = True; 4505 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) { 4506 case Ity_I1: case Ity_I8: case Ity_I16: 4507 case Ity_I32: case Ity_I64: case Ity_I128: 4508 break; 4509 case Ity_F32: case Ity_F64: case Ity_V128: 4510 *hasVorFtemps = True; 4511 break; 4512 default: 4513 goto bad; 4514 } 4515 break; 4516 case Ist_Put: 4517 vassert(isIRAtom(st->Ist.Put.data)); 4518 break; 4519 case Ist_Store: 4520 vassert(isIRAtom(st->Ist.Store.addr)); 4521 vassert(isIRAtom(st->Ist.Store.data)); 4522 break; 4523 case Ist_CAS: 4524 cas = st->Ist.CAS.details; 4525 vassert(isIRAtom(cas->addr)); 4526 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 4527 vassert(isIRAtom(cas->expdLo)); 4528 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 4529 vassert(isIRAtom(cas->dataLo)); 4530 break; 4531 case Ist_LLSC: 4532 vassert(isIRAtom(st->Ist.LLSC.addr)); 4533 if (st->Ist.LLSC.storedata) 4534 vassert(isIRAtom(st->Ist.LLSC.storedata)); 4535 break; 4536 case Ist_Dirty: 4537 d = st->Ist.Dirty.details; 4538 vassert(isIRAtom(d->guard)); 4539 for (j = 0; d->args[j]; j++) 4540 vassert(isIRAtom(d->args[j])); 4541 if (d->mFx != Ifx_None) 4542 vassert(isIRAtom(d->mAddr)); 4543 break; 4544 case Ist_NoOp: 4545 case Ist_IMark: 4546 case Ist_MBE: 4547 break; 4548 case Ist_Exit: 4549 vassert(isIRAtom(st->Ist.Exit.guard)); 4550 break; 4551 default: 4552 bad: 4553 ppIRStmt(st); 4554 vpanic("considerExpensives"); 4555 } 4556 } 4557 } 4558 4559 4560 /* ---------------- The main iropt entry point. ---------------- */ 4561 4562 /* exported from this file */ 4563 /* Rules of the game: 4564 4565 - IRExpr/IRStmt trees should be treated as immutable, as they 4566 may get shared. So never change a field of such a tree node; 4567 instead construct and return a new one if needed. 4568 */ 4569 4570 4571 IRSB* do_iropt_BB( 4572 IRSB* bb0, 4573 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int), 4574 Bool (*preciseMemExnsFn)(Int,Int), 4575 Addr64 guest_addr, 4576 VexArch guest_arch 4577 ) 4578 { 4579 static Int n_total = 0; 4580 static Int n_expensive = 0; 4581 4582 Bool hasGetIorPutI, hasVorFtemps; 4583 IRSB *bb, *bb2; 4584 4585 n_total++; 4586 4587 /* First flatten the block out, since all other 4588 phases assume flat code. */ 4589 4590 bb = flatten_BB ( bb0 ); 4591 4592 if (iropt_verbose) { 4593 vex_printf("\n========= FLAT\n\n" ); 4594 ppIRSB(bb); 4595 } 4596 4597 /* If at level 0, stop now. */ 4598 if (vex_control.iropt_level <= 0) return bb; 4599 4600 /* Now do a preliminary cleanup pass, and figure out if we also 4601 need to do 'expensive' optimisations. Expensive optimisations 4602 are deemed necessary if the block contains any GetIs or PutIs. 4603 If needed, do expensive transformations and then another cheap 4604 cleanup pass. */ 4605 4606 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 4607 4608 if (guest_arch == VexArchARM) { 4609 /* Translating Thumb2 code produces a lot of chaff. We have to 4610 work extra hard to get rid of it. */ 4611 bb = cprop_BB(bb); 4612 bb = spec_helpers_BB ( bb, specHelper ); 4613 redundant_put_removal_BB ( bb, preciseMemExnsFn ); 4614 do_deadcode_BB( bb ); 4615 } 4616 4617 if (vex_control.iropt_level > 1) { 4618 4619 /* Peer at what we have, to decide how much more effort to throw 4620 at it. */ 4621 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb ); 4622 4623 if (hasVorFtemps && !hasGetIorPutI) { 4624 /* If any evidence of FP or Vector activity, CSE, as that 4625 tends to mop up all manner of lardy code to do with 4626 rounding modes. Don't bother if hasGetIorPutI since that 4627 case leads into the expensive transformations, which do 4628 CSE anyway. */ 4629 (void)do_cse_BB( bb ); 4630 do_deadcode_BB( bb ); 4631 } 4632 4633 if (hasGetIorPutI) { 4634 Bool cses; 4635 n_expensive++; 4636 if (DEBUG_IROPT) 4637 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive); 4638 bb = expensive_transformations( bb ); 4639 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 4640 /* Potentially common up GetIs */ 4641 cses = do_cse_BB( bb ); 4642 if (cses) 4643 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 4644 } 4645 4646 /* Now have a go at unrolling simple (single-BB) loops. If 4647 successful, clean up the results as much as possible. */ 4648 4649 bb2 = maybe_loop_unroll_BB( bb, guest_addr ); 4650 if (bb2) { 4651 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn ); 4652 if (hasGetIorPutI) { 4653 bb = expensive_transformations( bb ); 4654 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 4655 } else { 4656 /* at least do CSE and dead code removal */ 4657 do_cse_BB( bb ); 4658 do_deadcode_BB( bb ); 4659 } 4660 if (0) vex_printf("vex iropt: unrolled a loop\n"); 4661 } 4662 4663 } 4664 4665 return bb; 4666 } 4667 4668 4669 /*---------------------------------------------------------------*/ 4670 /*--- end ir_opt.c ---*/ 4671 /*---------------------------------------------------------------*/ 4672