1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*---------------------------------------------------------------*/ 4 /*--- begin ir_opt.c ---*/ 5 /*---------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2004-2012 OpenWorks LLP 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35 */ 36 37 #include "libvex_basictypes.h" 38 #include "libvex_ir.h" 39 #include "libvex.h" 40 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "ir_opt.h" 44 45 46 /* Set to 1 for lots of debugging output. */ 47 #define DEBUG_IROPT 0 48 49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */ 50 #define STATS_IROPT 0 51 52 53 /* What iropt does, 29 Dec 04. 54 55 It takes an IRSB and produces a new one with the same meaning, 56 defined thus: 57 58 After execution of the new BB, all guest state and guest memory is 59 the same as after execution of the original. This is true 60 regardless of how the block was exited (at the end vs side exit). 61 62 In addition, parts of the guest state will be identical to that 63 created by execution of the original at the following observation 64 points: 65 66 * In a dirty helper call, any parts of the guest state that the 67 helper states that it reads or modifies will be up to date. 68 Also, guest memory will be up to date. Parts of the guest state 69 not marked as being read or modified by the helper cannot be 70 assumed to be up-to-date at the point where the helper is called. 71 72 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess : 73 Immediately prior to any load or store, those parts of the guest 74 state marked as requiring precise exceptions will be up to date. 75 Also, guest memory will be up to date. Parts of the guest state 76 not marked as requiring precise exceptions cannot be assumed to 77 be up-to-date at the point of the load/store. 78 79 If iropt_register_updates == VexRegUpdAllregsAtMemAccess: 80 Same as minimal, but all the guest state is up to date at memory 81 exception points. 82 83 If iropt_register_updates == VexRegUpdAllregsAtEachInsn : 84 Guest state is up to date at each instruction. 85 86 The relative order of loads and stores (including loads/stores of 87 guest memory done by dirty helpers annotated as such) is not 88 changed. However, the relative order of loads with no intervening 89 stores/modifies may be changed. 90 91 Transformation order 92 ~~~~~~~~~~~~~~~~~~~~ 93 94 There are three levels of optimisation, controlled by 95 vex_control.iropt_level. Define first: 96 97 "Cheap transformations" are the following sequence: 98 * Redundant-Get removal 99 * Redundant-Put removal 100 * Constant propagation/folding 101 * Dead code removal 102 * Specialisation of clean helper functions 103 * Dead code removal 104 105 "Expensive transformations" are the following sequence: 106 * CSE 107 * Folding of add/sub chains 108 * Redundant-GetI removal 109 * Redundant-PutI removal 110 * Dead code removal 111 112 Then the transformations are as follows, as defined by 113 vex_control.iropt_level: 114 115 Level 0: 116 * Flatten into atomic form. 117 118 Level 1: the following sequence: 119 * Flatten into atomic form. 120 * Cheap transformations. 121 122 Level 2: the following sequence 123 * Flatten into atomic form. 124 * Cheap transformations. 125 * If block contains any floating or vector types, CSE. 126 * If block contains GetI or PutI, Expensive transformations. 127 * Try unrolling loops. Three possible outcomes: 128 - No effect: do nothing more. 129 - Unrolled a loop, and block does not contain GetI or PutI: 130 Do: * CSE 131 * Dead code removal 132 - Unrolled a loop, and block contains GetI or PutI: 133 Do: * Expensive transformations 134 * Cheap transformations 135 */ 136 137 /* Implementation notes, 29 Dec 04. 138 139 TODO (important): I think rPutI removal ignores precise exceptions 140 and is therefore in a sense, wrong. In the sense that PutIs are 141 assumed not to write parts of the guest state that we need to have 142 up-to-date at loads/stores. So far on x86 guest that has not 143 mattered since indeed only the x87 FP registers and tags are 144 accessed using GetI/PutI, and there is no need so far for them to 145 be up to date at mem exception points. The rPutI pass should be 146 fixed. 147 148 TODO: improve pessimistic handling of precise exceptions 149 in the tree builder. 150 151 TODO: check interaction of rGetI and dirty helpers. 152 153 F64i constants are treated differently from other constants. 154 They are not regarded as atoms, and instead lifted off and 155 bound to temps. This allows them to participate in CSE, which 156 is important for getting good performance for x86 guest code. 157 158 CSE up F64 literals (already doing F64is) 159 160 CSE: consider carefully the requirement for precise exns 161 prior to making CSE any more aggressive. */ 162 163 164 /*---------------------------------------------------------------*/ 165 /*--- Finite mappery, of a sort ---*/ 166 /*---------------------------------------------------------------*/ 167 168 /* General map from HWord-sized thing HWord-sized thing. Could be by 169 hashing, but it's not clear whether or not this would really be any 170 faster. */ 171 172 typedef 173 struct { 174 Bool* inuse; 175 HWord* key; 176 HWord* val; 177 Int size; 178 Int used; 179 } 180 HashHW; 181 182 static HashHW* newHHW ( void ) 183 { 184 HashHW* h = LibVEX_Alloc(sizeof(HashHW)); 185 h->size = 8; 186 h->used = 0; 187 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool)); 188 h->key = LibVEX_Alloc(h->size * sizeof(HWord)); 189 h->val = LibVEX_Alloc(h->size * sizeof(HWord)); 190 return h; 191 } 192 193 194 /* Look up key in the map. */ 195 196 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key ) 197 { 198 Int i; 199 /* vex_printf("lookupHHW(%llx)\n", key ); */ 200 for (i = 0; i < h->used; i++) { 201 if (h->inuse[i] && h->key[i] == key) { 202 if (val) 203 *val = h->val[i]; 204 return True; 205 } 206 } 207 return False; 208 } 209 210 211 /* Add key->val to the map. Replaces any existing binding for key. */ 212 213 static void addToHHW ( HashHW* h, HWord key, HWord val ) 214 { 215 Int i, j; 216 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */ 217 218 /* Find and replace existing binding, if any. */ 219 for (i = 0; i < h->used; i++) { 220 if (h->inuse[i] && h->key[i] == key) { 221 h->val[i] = val; 222 return; 223 } 224 } 225 226 /* Ensure a space is available. */ 227 if (h->used == h->size) { 228 /* Copy into arrays twice the size. */ 229 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool)); 230 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord)); 231 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord)); 232 for (i = j = 0; i < h->size; i++) { 233 if (!h->inuse[i]) continue; 234 inuse2[j] = True; 235 key2[j] = h->key[i]; 236 val2[j] = h->val[i]; 237 j++; 238 } 239 h->used = j; 240 h->size *= 2; 241 h->inuse = inuse2; 242 h->key = key2; 243 h->val = val2; 244 } 245 246 /* Finally, add it. */ 247 vassert(h->used < h->size); 248 h->inuse[h->used] = True; 249 h->key[h->used] = key; 250 h->val[h->used] = val; 251 h->used++; 252 } 253 254 255 /*---------------------------------------------------------------*/ 256 /*--- Flattening out a BB into atomic SSA form ---*/ 257 /*---------------------------------------------------------------*/ 258 259 /* Non-critical helper, heuristic for reducing the number of tmp-tmp 260 copies made by flattening. If in doubt return False. */ 261 262 static Bool isFlat ( IRExpr* e ) 263 { 264 if (e->tag == Iex_Get) 265 return True; 266 if (e->tag == Iex_Binop) 267 return toBool( isIRAtom(e->Iex.Binop.arg1) 268 && isIRAtom(e->Iex.Binop.arg2) ); 269 if (e->tag == Iex_Load) 270 return isIRAtom(e->Iex.Load.addr); 271 return False; 272 } 273 274 /* Flatten out 'ex' so it is atomic, returning a new expression with 275 the same value, after having appended extra IRTemp assignments to 276 the end of 'bb'. */ 277 278 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) 279 { 280 Int i; 281 IRExpr** newargs; 282 IRType ty = typeOfIRExpr(bb->tyenv, ex); 283 IRTemp t1; 284 285 switch (ex->tag) { 286 287 case Iex_GetI: 288 t1 = newIRTemp(bb->tyenv, ty); 289 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 290 IRExpr_GetI(ex->Iex.GetI.descr, 291 flatten_Expr(bb, ex->Iex.GetI.ix), 292 ex->Iex.GetI.bias))); 293 return IRExpr_RdTmp(t1); 294 295 case Iex_Get: 296 t1 = newIRTemp(bb->tyenv, ty); 297 addStmtToIRSB(bb, 298 IRStmt_WrTmp(t1, ex)); 299 return IRExpr_RdTmp(t1); 300 301 case Iex_Qop: { 302 IRQop* qop = ex->Iex.Qop.details; 303 t1 = newIRTemp(bb->tyenv, ty); 304 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 305 IRExpr_Qop(qop->op, 306 flatten_Expr(bb, qop->arg1), 307 flatten_Expr(bb, qop->arg2), 308 flatten_Expr(bb, qop->arg3), 309 flatten_Expr(bb, qop->arg4)))); 310 return IRExpr_RdTmp(t1); 311 } 312 313 case Iex_Triop: { 314 IRTriop* triop = ex->Iex.Triop.details; 315 t1 = newIRTemp(bb->tyenv, ty); 316 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 317 IRExpr_Triop(triop->op, 318 flatten_Expr(bb, triop->arg1), 319 flatten_Expr(bb, triop->arg2), 320 flatten_Expr(bb, triop->arg3)))); 321 return IRExpr_RdTmp(t1); 322 } 323 324 case Iex_Binop: 325 t1 = newIRTemp(bb->tyenv, ty); 326 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 327 IRExpr_Binop(ex->Iex.Binop.op, 328 flatten_Expr(bb, ex->Iex.Binop.arg1), 329 flatten_Expr(bb, ex->Iex.Binop.arg2)))); 330 return IRExpr_RdTmp(t1); 331 332 case Iex_Unop: 333 t1 = newIRTemp(bb->tyenv, ty); 334 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 335 IRExpr_Unop(ex->Iex.Unop.op, 336 flatten_Expr(bb, ex->Iex.Unop.arg)))); 337 return IRExpr_RdTmp(t1); 338 339 case Iex_Load: 340 t1 = newIRTemp(bb->tyenv, ty); 341 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 342 IRExpr_Load(ex->Iex.Load.end, 343 ex->Iex.Load.ty, 344 flatten_Expr(bb, ex->Iex.Load.addr)))); 345 return IRExpr_RdTmp(t1); 346 347 case Iex_CCall: 348 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); 349 for (i = 0; newargs[i]; i++) 350 newargs[i] = flatten_Expr(bb, newargs[i]); 351 t1 = newIRTemp(bb->tyenv, ty); 352 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 353 IRExpr_CCall(ex->Iex.CCall.cee, 354 ex->Iex.CCall.retty, 355 newargs))); 356 return IRExpr_RdTmp(t1); 357 358 case Iex_Mux0X: 359 t1 = newIRTemp(bb->tyenv, ty); 360 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 361 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond), 362 flatten_Expr(bb, ex->Iex.Mux0X.expr0), 363 flatten_Expr(bb, ex->Iex.Mux0X.exprX)))); 364 return IRExpr_RdTmp(t1); 365 366 case Iex_Const: 367 /* Lift F64i constants out onto temps so they can be CSEd 368 later. */ 369 if (ex->Iex.Const.con->tag == Ico_F64i) { 370 t1 = newIRTemp(bb->tyenv, ty); 371 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 372 IRExpr_Const(ex->Iex.Const.con))); 373 return IRExpr_RdTmp(t1); 374 } else { 375 /* Leave all other constants alone. */ 376 return ex; 377 } 378 379 case Iex_RdTmp: 380 return ex; 381 382 default: 383 vex_printf("\n"); 384 ppIRExpr(ex); 385 vex_printf("\n"); 386 vpanic("flatten_Expr"); 387 } 388 } 389 390 391 /* Append a completely flattened form of 'st' to the end of 'bb'. */ 392 393 static void flatten_Stmt ( IRSB* bb, IRStmt* st ) 394 { 395 Int i; 396 IRExpr *e1, *e2, *e3, *e4, *e5; 397 IRDirty *d, *d2; 398 IRCAS *cas, *cas2; 399 IRPutI *puti, *puti2; 400 switch (st->tag) { 401 case Ist_Put: 402 if (isIRAtom(st->Ist.Put.data)) { 403 /* optimisation to reduce the amount of heap wasted 404 by the flattener */ 405 addStmtToIRSB(bb, st); 406 } else { 407 /* general case, always correct */ 408 e1 = flatten_Expr(bb, st->Ist.Put.data); 409 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); 410 } 411 break; 412 case Ist_PutI: 413 puti = st->Ist.PutI.details; 414 e1 = flatten_Expr(bb, puti->ix); 415 e2 = flatten_Expr(bb, puti->data); 416 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2); 417 addStmtToIRSB(bb, IRStmt_PutI(puti2)); 418 break; 419 case Ist_WrTmp: 420 if (isFlat(st->Ist.WrTmp.data)) { 421 /* optimisation, to reduce the number of tmp-tmp 422 copies generated */ 423 addStmtToIRSB(bb, st); 424 } else { 425 /* general case, always correct */ 426 e1 = flatten_Expr(bb, st->Ist.WrTmp.data); 427 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); 428 } 429 break; 430 case Ist_Store: 431 e1 = flatten_Expr(bb, st->Ist.Store.addr); 432 e2 = flatten_Expr(bb, st->Ist.Store.data); 433 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); 434 break; 435 case Ist_CAS: 436 cas = st->Ist.CAS.details; 437 e1 = flatten_Expr(bb, cas->addr); 438 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; 439 e3 = flatten_Expr(bb, cas->expdLo); 440 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; 441 e5 = flatten_Expr(bb, cas->dataLo); 442 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, 443 e1, e2, e3, e4, e5 ); 444 addStmtToIRSB(bb, IRStmt_CAS(cas2)); 445 break; 446 case Ist_LLSC: 447 e1 = flatten_Expr(bb, st->Ist.LLSC.addr); 448 e2 = st->Ist.LLSC.storedata 449 ? flatten_Expr(bb, st->Ist.LLSC.storedata) 450 : NULL; 451 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, 452 st->Ist.LLSC.result, e1, e2)); 453 break; 454 case Ist_Dirty: 455 d = st->Ist.Dirty.details; 456 d2 = emptyIRDirty(); 457 *d2 = *d; 458 d2->args = shallowCopyIRExprVec(d2->args); 459 if (d2->mFx != Ifx_None) { 460 d2->mAddr = flatten_Expr(bb, d2->mAddr); 461 } else { 462 vassert(d2->mAddr == NULL); 463 } 464 d2->guard = flatten_Expr(bb, d2->guard); 465 for (i = 0; d2->args[i]; i++) 466 d2->args[i] = flatten_Expr(bb, d2->args[i]); 467 addStmtToIRSB(bb, IRStmt_Dirty(d2)); 468 break; 469 case Ist_NoOp: 470 case Ist_MBE: 471 case Ist_IMark: 472 addStmtToIRSB(bb, st); 473 break; 474 case Ist_AbiHint: 475 e1 = flatten_Expr(bb, st->Ist.AbiHint.base); 476 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); 477 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); 478 break; 479 case Ist_Exit: 480 e1 = flatten_Expr(bb, st->Ist.Exit.guard); 481 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, 482 st->Ist.Exit.dst, 483 st->Ist.Exit.offsIP)); 484 break; 485 default: 486 vex_printf("\n"); 487 ppIRStmt(st); 488 vex_printf("\n"); 489 vpanic("flatten_Stmt"); 490 } 491 } 492 493 494 static IRSB* flatten_BB ( IRSB* in ) 495 { 496 Int i; 497 IRSB* out; 498 out = emptyIRSB(); 499 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 500 for (i = 0; i < in->stmts_used; i++) 501 if (in->stmts[i]) 502 flatten_Stmt( out, in->stmts[i] ); 503 out->next = flatten_Expr( out, in->next ); 504 out->jumpkind = in->jumpkind; 505 out->offsIP = in->offsIP; 506 return out; 507 } 508 509 510 /*---------------------------------------------------------------*/ 511 /*--- In-place removal of redundant GETs ---*/ 512 /*---------------------------------------------------------------*/ 513 514 /* Scan forwards, building up an environment binding (min offset, max 515 offset) pairs to values, which will either be temps or constants. 516 517 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the 518 env and if it matches, replace the Get with the stored value. If 519 there is no match, add a (minoff,maxoff) :-> t binding. 520 521 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env 522 any binding which fully or partially overlaps with (minoff,maxoff). 523 Then add a new (minoff,maxoff) :-> t or c binding. */ 524 525 /* Extract the min/max offsets from a guest state array descriptor. */ 526 527 inline 528 static void getArrayBounds ( IRRegArray* descr, 529 UInt* minoff, UInt* maxoff ) 530 { 531 *minoff = descr->base; 532 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1; 533 vassert((*minoff & ~0xFFFF) == 0); 534 vassert((*maxoff & ~0xFFFF) == 0); 535 vassert(*minoff <= *maxoff); 536 } 537 538 /* Create keys, of the form ((minoffset << 16) | maxoffset). */ 539 540 static UInt mk_key_GetPut ( Int offset, IRType ty ) 541 { 542 /* offset should fit in 16 bits. */ 543 UInt minoff = offset; 544 UInt maxoff = minoff + sizeofIRType(ty) - 1; 545 vassert((minoff & ~0xFFFF) == 0); 546 vassert((maxoff & ~0xFFFF) == 0); 547 return (minoff << 16) | maxoff; 548 } 549 550 static UInt mk_key_GetIPutI ( IRRegArray* descr ) 551 { 552 UInt minoff, maxoff; 553 getArrayBounds( descr, &minoff, &maxoff ); 554 vassert((minoff & ~0xFFFF) == 0); 555 vassert((maxoff & ~0xFFFF) == 0); 556 return (minoff << 16) | maxoff; 557 } 558 559 /* Supposing h has keys of the form generated by mk_key_GetPut and 560 mk_key_GetIPutI, invalidate any key which overlaps (k_lo 561 .. k_hi). 562 */ 563 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) 564 { 565 Int j; 566 UInt e_lo, e_hi; 567 vassert(k_lo <= k_hi); 568 /* invalidate any env entries which in any way overlap (k_lo 569 .. k_hi) */ 570 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */ 571 572 for (j = 0; j < h->used; j++) { 573 if (!h->inuse[j]) 574 continue; 575 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF; 576 e_hi = ((UInt)h->key[j]) & 0xFFFF; 577 vassert(e_lo <= e_hi); 578 if (e_hi < k_lo || k_hi < e_lo) 579 continue; /* no overlap possible */ 580 else 581 /* overlap; invalidate */ 582 h->inuse[j] = False; 583 } 584 } 585 586 587 static void redundant_get_removal_BB ( IRSB* bb ) 588 { 589 HashHW* env = newHHW(); 590 UInt key = 0; /* keep gcc -O happy */ 591 Int i, j; 592 HWord val; 593 594 for (i = 0; i < bb->stmts_used; i++) { 595 IRStmt* st = bb->stmts[i]; 596 597 if (st->tag == Ist_NoOp) 598 continue; 599 600 /* Deal with Gets */ 601 if (st->tag == Ist_WrTmp 602 && st->Ist.WrTmp.data->tag == Iex_Get) { 603 /* st is 't = Get(...)'. Look up in the environment and see 604 if the Get can be replaced. */ 605 IRExpr* get = st->Ist.WrTmp.data; 606 key = (HWord)mk_key_GetPut( get->Iex.Get.offset, 607 get->Iex.Get.ty ); 608 if (lookupHHW(env, &val, (HWord)key)) { 609 /* found it */ 610 /* Note, we could do better here. If the types are 611 different we don't do the substitution, since doing so 612 could lead to invalidly-typed IR. An improvement would 613 be to stick in a reinterpret-style cast, although that 614 would make maintaining flatness more difficult. */ 615 IRExpr* valE = (IRExpr*)val; 616 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) 617 == st->Ist.WrTmp.data->Iex.Get.ty ); 618 if (typesOK && DEBUG_IROPT) { 619 vex_printf("rGET: "); ppIRExpr(get); 620 vex_printf(" -> "); ppIRExpr(valE); 621 vex_printf("\n"); 622 } 623 if (typesOK) 624 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); 625 } else { 626 /* Not found, but at least we know that t and the Get(...) 627 are now associated. So add a binding to reflect that 628 fact. */ 629 addToHHW( env, (HWord)key, 630 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) ); 631 } 632 } 633 634 /* Deal with Puts: invalidate any env entries overlapped by this 635 Put */ 636 if (st->tag == Ist_Put || st->tag == Ist_PutI) { 637 UInt k_lo, k_hi; 638 if (st->tag == Ist_Put) { 639 key = mk_key_GetPut( st->Ist.Put.offset, 640 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 641 } else { 642 vassert(st->tag == Ist_PutI); 643 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 644 } 645 646 k_lo = (key >> 16) & 0xFFFF; 647 k_hi = key & 0xFFFF; 648 invalidateOverlaps(env, k_lo, k_hi); 649 } 650 else 651 if (st->tag == Ist_Dirty) { 652 /* Deal with dirty helpers which write or modify guest state. 653 Invalidate the entire env. We could do a lot better 654 here. */ 655 IRDirty* d = st->Ist.Dirty.details; 656 Bool writes = False; 657 for (j = 0; j < d->nFxState; j++) { 658 if (d->fxState[j].fx == Ifx_Modify 659 || d->fxState[j].fx == Ifx_Write) 660 writes = True; 661 } 662 if (writes) { 663 /* dump the entire env (not clever, but correct ...) */ 664 for (j = 0; j < env->used; j++) 665 env->inuse[j] = False; 666 if (0) vex_printf("rGET: trash env due to dirty helper\n"); 667 } 668 } 669 670 /* add this one to the env, if appropriate */ 671 if (st->tag == Ist_Put) { 672 vassert(isIRAtom(st->Ist.Put.data)); 673 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); 674 } 675 676 } /* for (i = 0; i < bb->stmts_used; i++) */ 677 678 } 679 680 681 /*---------------------------------------------------------------*/ 682 /*--- In-place removal of redundant PUTs ---*/ 683 /*---------------------------------------------------------------*/ 684 685 /* Find any Get uses in st and invalidate any partially or fully 686 overlapping ranges listed in env. Due to the flattening phase, the 687 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ 688 689 static void handle_gets_Stmt ( 690 HashHW* env, 691 IRStmt* st, 692 Bool (*preciseMemExnsFn)(Int,Int) 693 ) 694 { 695 Int j; 696 UInt key = 0; /* keep gcc -O happy */ 697 Bool isGet; 698 Bool memRW = False; 699 IRExpr* e; 700 701 switch (st->tag) { 702 703 /* This is the only interesting case. Deal with Gets in the RHS 704 expression. */ 705 case Ist_WrTmp: 706 e = st->Ist.WrTmp.data; 707 switch (e->tag) { 708 case Iex_Get: 709 isGet = True; 710 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty ); 711 break; 712 case Iex_GetI: 713 isGet = True; 714 key = mk_key_GetIPutI ( e->Iex.GetI.descr ); 715 break; 716 case Iex_Load: 717 isGet = False; 718 memRW = True; 719 break; 720 default: 721 isGet = False; 722 } 723 if (isGet) { 724 UInt k_lo, k_hi; 725 k_lo = (key >> 16) & 0xFFFF; 726 k_hi = key & 0xFFFF; 727 invalidateOverlaps(env, k_lo, k_hi); 728 } 729 break; 730 731 /* Be very conservative for dirty helper calls; dump the entire 732 environment. The helper might read guest state, in which 733 case it needs to be flushed first. Also, the helper might 734 access guest memory, in which case all parts of the guest 735 state requiring precise exceptions needs to be flushed. The 736 crude solution is just to flush everything; we could easily 737 enough do a lot better if needed. */ 738 /* Probably also overly-conservative, but also dump everything 739 if we hit a memory bus event (fence, lock, unlock). Ditto 740 AbiHints, CASs, LLs and SCs. */ 741 case Ist_AbiHint: 742 vassert(isIRAtom(st->Ist.AbiHint.base)); 743 vassert(isIRAtom(st->Ist.AbiHint.nia)); 744 /* fall through */ 745 case Ist_MBE: 746 case Ist_Dirty: 747 case Ist_CAS: 748 case Ist_LLSC: 749 for (j = 0; j < env->used; j++) 750 env->inuse[j] = False; 751 break; 752 753 /* all other cases are boring. */ 754 case Ist_Store: 755 vassert(isIRAtom(st->Ist.Store.addr)); 756 vassert(isIRAtom(st->Ist.Store.data)); 757 memRW = True; 758 break; 759 760 case Ist_Exit: 761 vassert(isIRAtom(st->Ist.Exit.guard)); 762 break; 763 764 case Ist_PutI: 765 vassert(isIRAtom(st->Ist.PutI.details->ix)); 766 vassert(isIRAtom(st->Ist.PutI.details->data)); 767 break; 768 769 case Ist_NoOp: 770 case Ist_IMark: 771 break; 772 773 default: 774 vex_printf("\n"); 775 ppIRStmt(st); 776 vex_printf("\n"); 777 vpanic("handle_gets_Stmt"); 778 } 779 780 if (memRW) { 781 /* This statement accesses memory. So we need to dump all parts 782 of the environment corresponding to guest state that may not 783 be reordered with respect to memory references. That means 784 at least the stack pointer. */ 785 switch (vex_control.iropt_register_updates) { 786 case VexRegUpdAllregsAtMemAccess: 787 /* Precise exceptions required at mem access. 788 Flush all guest state. */ 789 for (j = 0; j < env->used; j++) 790 env->inuse[j] = False; 791 break; 792 case VexRegUpdUnwindregsAtMemAccess: 793 for (j = 0; j < env->used; j++) { 794 if (!env->inuse[j]) 795 continue; 796 /* Just flush the minimal amount required, as computed by 797 preciseMemExnsFn. */ 798 HWord k_lo = (env->key[j] >> 16) & 0xFFFF; 799 HWord k_hi = env->key[j] & 0xFFFF; 800 if (preciseMemExnsFn( k_lo, k_hi )) 801 env->inuse[j] = False; 802 } 803 break; 804 default: 805 // VexRegUpdAllregsAtEachInsn cannot happen here. 806 // Neither any rubbish other value. 807 vassert(0); 808 } 809 } /* if (memRW) */ 810 811 } 812 813 814 /* Scan backwards, building up a set of (min offset, max 815 offset) pairs, indicating those parts of the guest state 816 for which the next event is a write. 817 818 On seeing a conditional exit, empty the set. 819 820 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is 821 completely within the set, remove the Put. Otherwise, add 822 (minoff,maxoff) to the set. 823 824 On seeing 'Get (minoff,maxoff)', remove any part of the set 825 overlapping (minoff,maxoff). The same has to happen for any events 826 which implicitly read parts of the guest state: dirty helper calls 827 and loads/stores. 828 */ 829 830 static void redundant_put_removal_BB ( 831 IRSB* bb, 832 Bool (*preciseMemExnsFn)(Int,Int) 833 ) 834 { 835 Int i, j; 836 Bool isPut; 837 IRStmt* st; 838 UInt key = 0; /* keep gcc -O happy */ 839 840 vassert 841 (vex_control.iropt_register_updates == VexRegUpdUnwindregsAtMemAccess 842 || vex_control.iropt_register_updates == VexRegUpdAllregsAtMemAccess); 843 844 HashHW* env = newHHW(); 845 846 /* Initialise the running env with the fact that the final exit 847 writes the IP (or, whatever it claims to write. We don't 848 care.) */ 849 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); 850 addToHHW(env, (HWord)key, 0); 851 852 /* And now scan backwards through the statements. */ 853 for (i = bb->stmts_used-1; i >= 0; i--) { 854 st = bb->stmts[i]; 855 856 if (st->tag == Ist_NoOp) 857 continue; 858 859 /* Deal with conditional exits. */ 860 if (st->tag == Ist_Exit) { 861 //Bool re_add; 862 /* Need to throw out from the env, any part of it which 863 doesn't overlap with the guest state written by this exit. 864 Since the exit only writes one section, it's simplest to 865 do this: (1) check whether env contains a write that 866 completely overlaps the write done by this exit; (2) empty 867 out env; and (3) if (1) was true, add the write done by 868 this exit. 869 870 To make (1) a bit simpler, merely search for a write that 871 exactly matches the one done by this exit. That's safe 872 because it will fail as often or more often than a full 873 overlap check, and failure to find an overlapping write in 874 env is the safe case (we just nuke env if that 875 happens). */ 876 //vassert(isIRAtom(st->Ist.Exit.guard)); 877 /* (1) */ 878 //key = mk_key_GetPut(st->Ist.Exit.offsIP, 879 // typeOfIRConst(st->Ist.Exit.dst)); 880 //re_add = lookupHHW(env, NULL, key); 881 /* (2) */ 882 for (j = 0; j < env->used; j++) 883 env->inuse[j] = False; 884 /* (3) */ 885 //if (0 && re_add) 886 // addToHHW(env, (HWord)key, 0); 887 continue; 888 } 889 890 /* Deal with Puts */ 891 switch (st->tag) { 892 case Ist_Put: 893 isPut = True; 894 key = mk_key_GetPut( st->Ist.Put.offset, 895 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 896 vassert(isIRAtom(st->Ist.Put.data)); 897 break; 898 case Ist_PutI: 899 isPut = True; 900 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 901 vassert(isIRAtom(st->Ist.PutI.details->ix)); 902 vassert(isIRAtom(st->Ist.PutI.details->data)); 903 break; 904 default: 905 isPut = False; 906 } 907 if (isPut && st->tag != Ist_PutI) { 908 /* See if any single entry in env overlaps this Put. This is 909 simplistic in that the transformation is valid if, say, two 910 or more entries in the env overlap this Put, but the use of 911 lookupHHW will only find a single entry which exactly 912 overlaps this Put. This is suboptimal but safe. */ 913 if (lookupHHW(env, NULL, (HWord)key)) { 914 /* This Put is redundant because a later one will overwrite 915 it. So NULL (nop) it out. */ 916 if (DEBUG_IROPT) { 917 vex_printf("rPUT: "); ppIRStmt(st); 918 vex_printf("\n"); 919 } 920 bb->stmts[i] = IRStmt_NoOp(); 921 } else { 922 /* We can't demonstrate that this Put is redundant, so add it 923 to the running collection. */ 924 addToHHW(env, (HWord)key, 0); 925 } 926 continue; 927 } 928 929 /* Deal with Gets. These remove bits of the environment since 930 appearance of a Get means that the next event for that slice 931 of the guest state is no longer a write, but a read. Also 932 deals with implicit reads of guest state needed to maintain 933 precise exceptions. */ 934 handle_gets_Stmt( env, st, preciseMemExnsFn ); 935 } 936 } 937 938 939 /*---------------------------------------------------------------*/ 940 /*--- Constant propagation and folding ---*/ 941 /*---------------------------------------------------------------*/ 942 943 #if STATS_IROPT 944 /* How often sameIRExprs was invoked */ 945 static UInt invocation_count; 946 /* How often sameIRExprs recursed through IRTemp assignments */ 947 static UInt recursion_count; 948 /* How often sameIRExprs found identical IRExprs */ 949 static UInt success_count; 950 /* How often recursing through assignments to IRTemps helped 951 establishing equality. */ 952 static UInt recursion_success_count; 953 /* Whether or not recursing through an IRTemp assignment helped 954 establishing IRExpr equality for a given sameIRExprs invocation. */ 955 static Bool recursion_helped; 956 /* Whether or not a given sameIRExprs invocation recursed through an 957 IRTemp assignment */ 958 static Bool recursed; 959 /* Maximum number of nodes ever visited when comparing two IRExprs. */ 960 static UInt max_nodes_visited; 961 #endif /* STATS_IROPT */ 962 963 /* Count the number of nodes visited for a given sameIRExprs invocation. */ 964 static UInt num_nodes_visited; 965 966 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs. 967 This is to guard against performance degradation by visiting large 968 trees without success. */ 969 #define NODE_LIMIT 30 970 971 972 /* The env in this section is a map from IRTemp to IRExpr*, 973 that is, an array indexed by IRTemp. */ 974 975 /* Do both expressions compute the same value? The answer is generally 976 conservative, i.e. it will report that the expressions do not compute 977 the same value when in fact they do. The reason is that we do not 978 keep track of changes in the guest state and memory. Thusly, two 979 Get's, GetI's or Load's, even when accessing the same location, will be 980 assumed to compute different values. After all the accesses may happen 981 at different times and the guest state / memory can have changed in 982 the meantime. 983 984 XXX IMPORTANT XXX the two expressions must have the same IR type. 985 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */ 986 987 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable 988 wrapper that deals with the common tags-don't-match case, and a 989 slower out of line general case. Saves a few insns. */ 990 991 __attribute__((noinline)) 992 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); 993 994 inline 995 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 996 { 997 if (e1->tag != e2->tag) return False; 998 return sameIRExprs_aux2(env, e1, e2); 999 } 1000 1001 __attribute__((noinline)) 1002 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1003 { 1004 if (num_nodes_visited++ > NODE_LIMIT) return False; 1005 1006 switch (e1->tag) { 1007 case Iex_RdTmp: 1008 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; 1009 1010 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { 1011 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp], 1012 env[e2->Iex.RdTmp.tmp]); 1013 #if STATS_IROPT 1014 recursed = True; 1015 if (same) recursion_helped = True; 1016 #endif 1017 return same; 1018 } 1019 return False; 1020 1021 case Iex_Get: 1022 case Iex_GetI: 1023 case Iex_Load: 1024 /* Guest state / memory could have changed in the meantime. */ 1025 return False; 1026 1027 case Iex_Binop: 1028 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op 1029 && sameIRExprs_aux( env, e1->Iex.Binop.arg1, 1030 e2->Iex.Binop.arg1 ) 1031 && sameIRExprs_aux( env, e1->Iex.Binop.arg2, 1032 e2->Iex.Binop.arg2 )); 1033 1034 case Iex_Unop: 1035 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op 1036 && sameIRExprs_aux( env, e1->Iex.Unop.arg, 1037 e2->Iex.Unop.arg )); 1038 1039 case Iex_Const: { 1040 IRConst *c1 = e1->Iex.Const.con; 1041 IRConst *c2 = e2->Iex.Const.con; 1042 vassert(c1->tag == c2->tag); 1043 switch (c1->tag) { 1044 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 ); 1045 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 ); 1046 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 ); 1047 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 ); 1048 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 ); 1049 default: break; 1050 } 1051 return False; 1052 } 1053 1054 case Iex_Triop: { 1055 IRTriop *tri1 = e1->Iex.Triop.details; 1056 IRTriop *tri2 = e2->Iex.Triop.details; 1057 return toBool( tri1->op == tri2->op 1058 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 ) 1059 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 ) 1060 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 )); 1061 } 1062 1063 case Iex_Mux0X: 1064 return toBool( sameIRExprs_aux( env, e1->Iex.Mux0X.cond, 1065 e2->Iex.Mux0X.cond ) 1066 && sameIRExprs_aux( env, e1->Iex.Mux0X.expr0, 1067 e2->Iex.Mux0X.expr0 ) 1068 && sameIRExprs_aux( env, e1->Iex.Mux0X.exprX, 1069 e2->Iex.Mux0X.exprX )); 1070 1071 default: 1072 /* Not very likely to be "same". */ 1073 break; 1074 } 1075 1076 return False; 1077 } 1078 1079 inline 1080 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1081 { 1082 Bool same; 1083 1084 num_nodes_visited = 0; 1085 same = sameIRExprs_aux(env, e1, e2); 1086 1087 #if STATS_IROPT 1088 ++invocation_count; 1089 if (recursed) ++recursion_count; 1090 success_count += same; 1091 if (same && recursion_helped) 1092 ++recursion_success_count; 1093 if (num_nodes_visited > max_nodes_visited) 1094 max_nodes_visited = num_nodes_visited; 1095 recursed = False; /* reset */ 1096 recursion_helped = False; /* reset */ 1097 #endif /* STATS_IROPT */ 1098 1099 return same; 1100 } 1101 1102 1103 /* Debugging-only hack (not used in production runs): make a guess 1104 whether sameIRExprs might assert due to the two args being of 1105 different types. If in doubt return False. Is only used when 1106 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0. 1107 Bad because it duplicates functionality from typeOfIRExpr. See 1108 comment on the single use point below for rationale. */ 1109 static 1110 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) 1111 { 1112 if (e1->tag != e2->tag) return False; 1113 switch (e1->tag) { 1114 case Iex_Const: { 1115 /* The only interesting case */ 1116 IRConst *c1 = e1->Iex.Const.con; 1117 IRConst *c2 = e2->Iex.Const.con; 1118 return c1->tag != c2->tag; 1119 } 1120 default: 1121 break; 1122 } 1123 return False; 1124 } 1125 1126 1127 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ 1128 static Bool isZeroU32 ( IRExpr* e ) 1129 { 1130 return toBool( e->tag == Iex_Const 1131 && e->Iex.Const.con->tag == Ico_U32 1132 && e->Iex.Const.con->Ico.U32 == 0); 1133 } 1134 1135 /* Is this literally IRExpr_Const(IRConst_U32(1---1)) ? */ 1136 static Bool isOnesU32 ( IRExpr* e ) 1137 { 1138 return toBool( e->tag == Iex_Const 1139 && e->Iex.Const.con->tag == Ico_U32 1140 && e->Iex.Const.con->Ico.U32 == 0xFFFFFFFF ); 1141 } 1142 1143 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? */ 1144 static Bool isZeroU64 ( IRExpr* e ) 1145 { 1146 return toBool( e->tag == Iex_Const 1147 && e->Iex.Const.con->tag == Ico_U64 1148 && e->Iex.Const.con->Ico.U64 == 0); 1149 } 1150 1151 /* Is this an integer constant with value 0 ? */ 1152 static Bool isZeroU ( IRExpr* e ) 1153 { 1154 if (e->tag != Iex_Const) return False; 1155 switch (e->Iex.Const.con->tag) { 1156 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0); 1157 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0); 1158 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0); 1159 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0); 1160 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0); 1161 default: vpanic("isZeroU"); 1162 } 1163 } 1164 1165 /* Is this an integer constant with value 1---1b ? */ 1166 static Bool isOnesU ( IRExpr* e ) 1167 { 1168 if (e->tag != Iex_Const) return False; 1169 switch (e->Iex.Const.con->tag) { 1170 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF); 1171 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF); 1172 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 1173 == 0xFFFFFFFF); 1174 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 1175 == 0xFFFFFFFFFFFFFFFFULL); 1176 default: ppIRExpr(e); vpanic("isOnesU"); 1177 } 1178 } 1179 1180 static Bool notBool ( Bool b ) 1181 { 1182 if (b == True) return False; 1183 if (b == False) return True; 1184 vpanic("notBool"); 1185 } 1186 1187 /* Make a zero which has the same type as the result of the given 1188 primop. */ 1189 static IRExpr* mkZeroOfPrimopResultType ( IROp op ) 1190 { 1191 switch (op) { 1192 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0))); 1193 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0)); 1194 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); 1195 case Iop_Sub32: 1196 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); 1197 case Iop_Sub64: 1198 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); 1199 case Iop_XorV128: return IRExpr_Const(IRConst_V128(0)); 1200 default: vpanic("mkZeroOfPrimopResultType: bad primop"); 1201 } 1202 } 1203 1204 /* Make a value containing all 1-bits, which has the same type as the 1205 result of the given primop. */ 1206 static IRExpr* mkOnesOfPrimopResultType ( IROp op ) 1207 { 1208 switch (op) { 1209 case Iop_CmpEQ32: 1210 case Iop_CmpEQ64: 1211 return IRExpr_Const(IRConst_U1(toBool(1))); 1212 case Iop_Or8: 1213 return IRExpr_Const(IRConst_U8(0xFF)); 1214 case Iop_Or16: 1215 return IRExpr_Const(IRConst_U16(0xFFFF)); 1216 case Iop_Or32: 1217 return IRExpr_Const(IRConst_U32(0xFFFFFFFF)); 1218 case Iop_CmpEQ8x8: 1219 case Iop_Or64: 1220 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL)); 1221 case Iop_CmpEQ8x16: 1222 case Iop_CmpEQ16x8: 1223 case Iop_CmpEQ32x4: 1224 return IRExpr_Const(IRConst_V128(0xFFFF)); 1225 default: 1226 ppIROp(op); 1227 vpanic("mkOnesOfPrimopResultType: bad primop"); 1228 } 1229 } 1230 1231 /* Helpers for folding Clz32/64. */ 1232 static UInt fold_Clz64 ( ULong value ) 1233 { 1234 UInt i; 1235 vassert(value != 0ULL); /* no defined semantics for arg==0 */ 1236 for (i = 0; i < 64; ++i) { 1237 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i; 1238 } 1239 vassert(0); 1240 /*NOTREACHED*/ 1241 return 0; 1242 } 1243 1244 static UInt fold_Clz32 ( UInt value ) 1245 { 1246 UInt i; 1247 vassert(value != 0); /* no defined semantics for arg==0 */ 1248 for (i = 0; i < 32; ++i) { 1249 if (0 != (value & (((UInt)1) << (31 - i)))) return i; 1250 } 1251 vassert(0); 1252 /*NOTREACHED*/ 1253 return 0; 1254 } 1255 1256 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to 1257 the corresponding real constant. */ 1258 //XXX re-check this before use 1259 //static ULong de_summarise_V64 ( UChar v64 ) 1260 //{ 1261 // ULong r = 0; 1262 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL; 1263 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL; 1264 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL; 1265 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL; 1266 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL; 1267 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL; 1268 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL; 1269 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL; 1270 // return r; 1271 //} 1272 1273 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) 1274 { 1275 Int shift; 1276 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ 1277 1278 switch (e->tag) { 1279 case Iex_Unop: 1280 /* UNARY ops */ 1281 if (e->Iex.Unop.arg->tag == Iex_Const) { 1282 switch (e->Iex.Unop.op) { 1283 case Iop_1Uto8: 1284 e2 = IRExpr_Const(IRConst_U8(toUChar( 1285 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1286 ? 1 : 0))); 1287 break; 1288 case Iop_1Uto32: 1289 e2 = IRExpr_Const(IRConst_U32( 1290 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1291 ? 1 : 0)); 1292 break; 1293 case Iop_1Uto64: 1294 e2 = IRExpr_Const(IRConst_U64( 1295 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1296 ? 1 : 0)); 1297 break; 1298 1299 case Iop_1Sto8: 1300 e2 = IRExpr_Const(IRConst_U8(toUChar( 1301 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1302 ? 0xFF : 0))); 1303 break; 1304 case Iop_1Sto16: 1305 e2 = IRExpr_Const(IRConst_U16(toUShort( 1306 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1307 ? 0xFFFF : 0))); 1308 break; 1309 case Iop_1Sto32: 1310 e2 = IRExpr_Const(IRConst_U32( 1311 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1312 ? 0xFFFFFFFF : 0)); 1313 break; 1314 case Iop_1Sto64: 1315 e2 = IRExpr_Const(IRConst_U64( 1316 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1317 ? 0xFFFFFFFFFFFFFFFFULL : 0)); 1318 break; 1319 1320 case Iop_8Sto32: { 1321 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1322 s32 <<= 24; 1323 s32 >>= 24; 1324 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1325 break; 1326 } 1327 case Iop_16Sto32: { 1328 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1329 s32 <<= 16; 1330 s32 >>= 16; 1331 e2 = IRExpr_Const(IRConst_U32( (UInt)s32) ); 1332 break; 1333 } 1334 case Iop_8Uto64: 1335 e2 = IRExpr_Const(IRConst_U64( 1336 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1337 break; 1338 case Iop_16Uto64: 1339 e2 = IRExpr_Const(IRConst_U64( 1340 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1341 break; 1342 case Iop_8Uto32: 1343 e2 = IRExpr_Const(IRConst_U32( 1344 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1345 break; 1346 case Iop_8Sto16: { 1347 /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1348 s16 <<= 8; 1349 s16 >>= 8; 1350 e2 = IRExpr_Const(IRConst_U16( (UShort)s16) ); 1351 break; 1352 } 1353 case Iop_8Uto16: 1354 e2 = IRExpr_Const(IRConst_U16( 1355 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1356 break; 1357 case Iop_16Uto32: 1358 e2 = IRExpr_Const(IRConst_U32( 1359 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1360 break; 1361 case Iop_32to16: 1362 e2 = IRExpr_Const(IRConst_U16(toUShort( 1363 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1364 break; 1365 case Iop_32to8: 1366 e2 = IRExpr_Const(IRConst_U8(toUChar( 1367 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1368 break; 1369 case Iop_32to1: 1370 e2 = IRExpr_Const(IRConst_U1(toBool( 1371 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1372 ))); 1373 break; 1374 case Iop_64to1: 1375 e2 = IRExpr_Const(IRConst_U1(toBool( 1376 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64) 1377 ))); 1378 break; 1379 1380 case Iop_NotV128: 1381 e2 = IRExpr_Const(IRConst_V128( 1382 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128))); 1383 break; 1384 case Iop_Not64: 1385 e2 = IRExpr_Const(IRConst_U64( 1386 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); 1387 break; 1388 case Iop_Not32: 1389 e2 = IRExpr_Const(IRConst_U32( 1390 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1391 break; 1392 case Iop_Not16: 1393 e2 = IRExpr_Const(IRConst_U16(toUShort( 1394 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16)))); 1395 break; 1396 case Iop_Not8: 1397 e2 = IRExpr_Const(IRConst_U8(toUChar( 1398 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); 1399 break; 1400 1401 case Iop_Not1: 1402 e2 = IRExpr_Const(IRConst_U1( 1403 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); 1404 break; 1405 1406 case Iop_64to8: { 1407 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1408 w64 &= 0xFFULL; 1409 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 )); 1410 break; 1411 } 1412 case Iop_64to16: { 1413 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1414 w64 &= 0xFFFFULL; 1415 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 )); 1416 break; 1417 } 1418 case Iop_64to32: { 1419 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1420 w64 &= 0x00000000FFFFFFFFULL; 1421 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1422 break; 1423 } 1424 case Iop_64HIto32: { 1425 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1426 w64 >>= 32; 1427 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1428 break; 1429 } 1430 case Iop_32Uto64: 1431 e2 = IRExpr_Const(IRConst_U64( 1432 0xFFFFFFFFULL 1433 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)); 1434 break; 1435 case Iop_16Sto64: { 1436 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1437 s64 <<= 48; 1438 s64 >>= 48; 1439 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1440 break; 1441 } 1442 case Iop_32Sto64: { 1443 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1444 s64 <<= 32; 1445 s64 >>= 32; 1446 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1447 break; 1448 } 1449 1450 case Iop_16to8: { 1451 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1452 w16 &= 0xFF; 1453 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1454 break; 1455 } 1456 case Iop_16HIto8: { 1457 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1458 w16 >>= 8; 1459 w16 &= 0xFF; 1460 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1461 break; 1462 } 1463 1464 case Iop_CmpNEZ8: 1465 e2 = IRExpr_Const(IRConst_U1(toBool( 1466 0 != 1467 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) 1468 ))); 1469 break; 1470 case Iop_CmpNEZ32: 1471 e2 = IRExpr_Const(IRConst_U1(toBool( 1472 0 != 1473 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1474 ))); 1475 break; 1476 case Iop_CmpNEZ64: 1477 e2 = IRExpr_Const(IRConst_U1(toBool( 1478 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64 1479 ))); 1480 break; 1481 1482 case Iop_CmpwNEZ32: { 1483 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1484 if (w32 == 0) 1485 e2 = IRExpr_Const(IRConst_U32( 0 )); 1486 else 1487 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); 1488 break; 1489 } 1490 case Iop_CmpwNEZ64: { 1491 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1492 if (w64 == 0) 1493 e2 = IRExpr_Const(IRConst_U64( 0 )); 1494 else 1495 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); 1496 break; 1497 } 1498 1499 case Iop_Left32: { 1500 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1501 Int s32 = (Int)(u32 & 0xFFFFFFFF); 1502 s32 = (s32 | (-s32)); 1503 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); 1504 break; 1505 } 1506 1507 case Iop_Left64: { 1508 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1509 Long s64 = (Long)u64; 1510 s64 = (s64 | (-s64)); 1511 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); 1512 break; 1513 } 1514 1515 case Iop_Clz32: { 1516 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1517 if (u32 != 0) 1518 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32))); 1519 break; 1520 } 1521 case Iop_Clz64: { 1522 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1523 if (u64 != 0ULL) 1524 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64))); 1525 break; 1526 } 1527 1528 /* For these vector ones, can't fold all cases, but at least 1529 do the most obvious one. Could do better here using 1530 summarise/desummarise of vector constants, but too 1531 difficult to verify; hence just handle the zero cases. */ 1532 case Iop_32UtoV128: { 1533 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1534 if (0 == u32) { 1535 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1536 } else { 1537 goto unhandled; 1538 } 1539 break; 1540 } 1541 case Iop_V128to64: { 1542 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1543 if (0 == ((v128 >> 0) & 0xFF)) { 1544 e2 = IRExpr_Const(IRConst_U64(0)); 1545 } else { 1546 goto unhandled; 1547 } 1548 break; 1549 } 1550 case Iop_V128HIto64: { 1551 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1552 if (0 == ((v128 >> 8) & 0xFF)) { 1553 e2 = IRExpr_Const(IRConst_U64(0)); 1554 } else { 1555 goto unhandled; 1556 } 1557 break; 1558 } 1559 case Iop_64UtoV128: { 1560 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1561 if (0 == u64) { 1562 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1563 } else { 1564 goto unhandled; 1565 } 1566 break; 1567 } 1568 1569 /* Even stupider (although still correct ..) */ 1570 case Iop_V256to64_0: case Iop_V256to64_1: 1571 case Iop_V256to64_2: case Iop_V256to64_3: { 1572 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256; 1573 if (v256 == 0x00000000) { 1574 e2 = IRExpr_Const(IRConst_U64(0)); 1575 } else { 1576 goto unhandled; 1577 } 1578 break; 1579 } 1580 1581 default: 1582 goto unhandled; 1583 } 1584 } 1585 break; 1586 1587 case Iex_Binop: 1588 /* BINARY ops */ 1589 if (e->Iex.Binop.arg1->tag == Iex_Const 1590 && e->Iex.Binop.arg2->tag == Iex_Const) { 1591 /* cases where both args are consts */ 1592 switch (e->Iex.Binop.op) { 1593 1594 /* -- Or -- */ 1595 case Iop_Or8: 1596 e2 = IRExpr_Const(IRConst_U8(toUChar( 1597 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1598 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1599 break; 1600 case Iop_Or16: 1601 e2 = IRExpr_Const(IRConst_U16(toUShort( 1602 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1603 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1604 break; 1605 case Iop_Or32: 1606 e2 = IRExpr_Const(IRConst_U32( 1607 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1608 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1609 break; 1610 case Iop_Or64: 1611 e2 = IRExpr_Const(IRConst_U64( 1612 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1613 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1614 break; 1615 case Iop_OrV128: 1616 e2 = IRExpr_Const(IRConst_V128( 1617 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1618 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1619 break; 1620 1621 /* -- Xor -- */ 1622 case Iop_Xor8: 1623 e2 = IRExpr_Const(IRConst_U8(toUChar( 1624 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1625 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1626 break; 1627 case Iop_Xor16: 1628 e2 = IRExpr_Const(IRConst_U16(toUShort( 1629 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1630 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1631 break; 1632 case Iop_Xor32: 1633 e2 = IRExpr_Const(IRConst_U32( 1634 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1635 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1636 break; 1637 case Iop_Xor64: 1638 e2 = IRExpr_Const(IRConst_U64( 1639 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1640 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1641 break; 1642 case Iop_XorV128: 1643 e2 = IRExpr_Const(IRConst_V128( 1644 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1645 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1646 break; 1647 1648 /* -- And -- */ 1649 case Iop_And8: 1650 e2 = IRExpr_Const(IRConst_U8(toUChar( 1651 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1652 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1653 break; 1654 case Iop_And16: 1655 e2 = IRExpr_Const(IRConst_U16(toUShort( 1656 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1657 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1658 break; 1659 case Iop_And32: 1660 e2 = IRExpr_Const(IRConst_U32( 1661 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1662 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1663 break; 1664 case Iop_And64: 1665 e2 = IRExpr_Const(IRConst_U64( 1666 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1667 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1668 break; 1669 case Iop_AndV128: 1670 e2 = IRExpr_Const(IRConst_V128( 1671 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1672 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1673 break; 1674 1675 /* -- Add -- */ 1676 case Iop_Add8: 1677 e2 = IRExpr_Const(IRConst_U8(toUChar( 1678 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1679 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1680 break; 1681 case Iop_Add32: 1682 e2 = IRExpr_Const(IRConst_U32( 1683 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1684 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1685 break; 1686 case Iop_Add64: 1687 e2 = IRExpr_Const(IRConst_U64( 1688 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1689 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1690 break; 1691 1692 /* -- Sub -- */ 1693 case Iop_Sub8: 1694 e2 = IRExpr_Const(IRConst_U8(toUChar( 1695 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1696 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1697 break; 1698 case Iop_Sub32: 1699 e2 = IRExpr_Const(IRConst_U32( 1700 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1701 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1702 break; 1703 case Iop_Sub64: 1704 e2 = IRExpr_Const(IRConst_U64( 1705 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1706 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1707 break; 1708 1709 /* -- Max32U -- */ 1710 case Iop_Max32U: { 1711 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1712 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1713 UInt res = u32a > u32b ? u32a : u32b; 1714 e2 = IRExpr_Const(IRConst_U32(res)); 1715 break; 1716 } 1717 1718 /* -- Mul -- */ 1719 case Iop_Mul32: 1720 e2 = IRExpr_Const(IRConst_U32( 1721 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1722 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1723 break; 1724 case Iop_Mul64: 1725 e2 = IRExpr_Const(IRConst_U64( 1726 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1727 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1728 break; 1729 1730 case Iop_MullS32: { 1731 /* very paranoid */ 1732 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1733 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1734 Int s32a = (Int)u32a; 1735 Int s32b = (Int)u32b; 1736 Long s64a = (Long)s32a; 1737 Long s64b = (Long)s32b; 1738 Long sres = s64a * s64b; 1739 ULong ures = (ULong)sres; 1740 e2 = IRExpr_Const(IRConst_U64(ures)); 1741 break; 1742 } 1743 1744 /* -- Shl -- */ 1745 case Iop_Shl32: 1746 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1747 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1748 if (shift >= 0 && shift <= 31) 1749 e2 = IRExpr_Const(IRConst_U32( 1750 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1751 << shift))); 1752 break; 1753 case Iop_Shl64: 1754 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1755 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1756 if (shift >= 0 && shift <= 63) 1757 e2 = IRExpr_Const(IRConst_U64( 1758 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1759 << shift))); 1760 break; 1761 1762 /* -- Sar -- */ 1763 case Iop_Sar32: { 1764 /* paranoid ... */ 1765 /*signed*/ Int s32; 1766 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1767 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1768 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1769 if (shift >= 0 && shift <= 31) { 1770 s32 >>=/*signed*/ shift; 1771 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1772 } 1773 break; 1774 } 1775 case Iop_Sar64: { 1776 /* paranoid ... */ 1777 /*signed*/ Long s64; 1778 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1779 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1780 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1781 if (shift >= 0 && shift <= 63) { 1782 s64 >>=/*signed*/ shift; 1783 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1784 } 1785 break; 1786 } 1787 1788 /* -- Shr -- */ 1789 case Iop_Shr32: { 1790 /* paranoid ... */ 1791 /*unsigned*/ UInt u32; 1792 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1793 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1794 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1795 if (shift >= 0 && shift <= 31) { 1796 u32 >>=/*unsigned*/ shift; 1797 e2 = IRExpr_Const(IRConst_U32(u32)); 1798 } 1799 break; 1800 } 1801 case Iop_Shr64: { 1802 /* paranoid ... */ 1803 /*unsigned*/ ULong u64; 1804 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1805 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1806 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1807 if (shift >= 0 && shift <= 63) { 1808 u64 >>=/*unsigned*/ shift; 1809 e2 = IRExpr_Const(IRConst_U64(u64)); 1810 } 1811 break; 1812 } 1813 1814 /* -- CmpEQ -- */ 1815 case Iop_CmpEQ32: 1816 e2 = IRExpr_Const(IRConst_U1(toBool( 1817 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1818 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1819 break; 1820 case Iop_CmpEQ64: 1821 e2 = IRExpr_Const(IRConst_U1(toBool( 1822 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1823 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1824 break; 1825 1826 /* -- CmpNE -- */ 1827 case Iop_CmpNE8: 1828 e2 = IRExpr_Const(IRConst_U1(toBool( 1829 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) 1830 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); 1831 break; 1832 case Iop_CmpNE32: 1833 e2 = IRExpr_Const(IRConst_U1(toBool( 1834 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1835 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1836 break; 1837 case Iop_CmpNE64: 1838 e2 = IRExpr_Const(IRConst_U1(toBool( 1839 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1840 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1841 break; 1842 1843 /* -- CmpLEU -- */ 1844 case Iop_CmpLE32U: 1845 e2 = IRExpr_Const(IRConst_U1(toBool( 1846 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1847 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1848 break; 1849 case Iop_CmpLE64U: 1850 e2 = IRExpr_Const(IRConst_U1(toBool( 1851 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1852 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1853 break; 1854 1855 /* -- CmpLES -- */ 1856 case Iop_CmpLE32S: 1857 e2 = IRExpr_Const(IRConst_U1(toBool( 1858 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1859 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1860 break; 1861 case Iop_CmpLE64S: 1862 e2 = IRExpr_Const(IRConst_U1(toBool( 1863 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1864 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1865 break; 1866 1867 /* -- CmpLTS -- */ 1868 case Iop_CmpLT32S: 1869 e2 = IRExpr_Const(IRConst_U1(toBool( 1870 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1871 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1872 break; 1873 case Iop_CmpLT64S: 1874 e2 = IRExpr_Const(IRConst_U1(toBool( 1875 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1876 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1877 break; 1878 1879 /* -- CmpLTU -- */ 1880 case Iop_CmpLT32U: 1881 e2 = IRExpr_Const(IRConst_U1(toBool( 1882 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1883 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1884 break; 1885 case Iop_CmpLT64U: 1886 e2 = IRExpr_Const(IRConst_U1(toBool( 1887 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1888 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1889 break; 1890 1891 /* -- CmpORD -- */ 1892 case Iop_CmpORD32S: { 1893 /* very paranoid */ 1894 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1895 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1896 Int s32a = (Int)u32a; 1897 Int s32b = (Int)u32b; 1898 Int r = 0x2; /* EQ */ 1899 if (s32a < s32b) { 1900 r = 0x8; /* LT */ 1901 } 1902 else if (s32a > s32b) { 1903 r = 0x4; /* GT */ 1904 } 1905 e2 = IRExpr_Const(IRConst_U32(r)); 1906 break; 1907 } 1908 1909 /* -- nHLto2n -- */ 1910 case Iop_32HLto64: 1911 e2 = IRExpr_Const(IRConst_U64( 1912 (((ULong)(e->Iex.Binop.arg1 1913 ->Iex.Const.con->Ico.U32)) << 32) 1914 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)) 1915 )); 1916 break; 1917 case Iop_64HLto128: 1918 /* We can't fold this, because there is no way to 1919 express he result in IR, but at least pretend to 1920 handle it, so as to stop getting blasted with 1921 no-rule-for-this-primop messages. */ 1922 break; 1923 /* For this vector one, can't fold all cases, but at 1924 least do the most obvious one. Could do better here 1925 using summarise/desummarise of vector constants, but 1926 too difficult to verify; hence just handle the zero 1927 cases. */ 1928 case Iop_64HLtoV128: { 1929 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64; 1930 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 1931 if (0 == argHi && 0 == argLo) { 1932 e2 = IRExpr_Const(IRConst_V128(0)); 1933 } else { 1934 goto unhandled; 1935 } 1936 break; 1937 } 1938 1939 /* -- V128 stuff -- */ 1940 case Iop_InterleaveLO8x16: { 1941 /* This turns up a lot in Memcheck instrumentation of 1942 Icc generated code. I don't know why. */ 1943 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128; 1944 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128; 1945 if (0 == arg1 && 0 == arg2) { 1946 e2 = IRExpr_Const(IRConst_V128(0)); 1947 } else { 1948 goto unhandled; 1949 } 1950 break; 1951 } 1952 1953 default: 1954 goto unhandled; 1955 } 1956 1957 } else { 1958 1959 /* other cases (identities, etc) */ 1960 switch (e->Iex.Binop.op) { 1961 1962 case Iop_Shl32: 1963 case Iop_Shl64: 1964 case Iop_Shr64: 1965 /* Shl32/Shl64/Shr64(x,0) ==> x */ 1966 if (isZeroU(e->Iex.Binop.arg2)) { 1967 e2 = e->Iex.Binop.arg1; 1968 break; 1969 } 1970 /* Shl32/Shl64/Shr64(0,x) ==> 0 */ 1971 if (isZeroU(e->Iex.Binop.arg1)) { 1972 e2 = e->Iex.Binop.arg1; 1973 break; 1974 } 1975 break; 1976 1977 case Iop_Shr32: 1978 /* Shr32(x,0) ==> x */ 1979 if (isZeroU(e->Iex.Binop.arg2)) { 1980 e2 = e->Iex.Binop.arg1; 1981 break; 1982 } 1983 break; 1984 1985 case Iop_Or8: 1986 case Iop_Or16: 1987 case Iop_Or32: 1988 case Iop_Or64: 1989 case Iop_Max32U: 1990 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */ 1991 if (isZeroU(e->Iex.Binop.arg2)) { 1992 e2 = e->Iex.Binop.arg1; 1993 break; 1994 } 1995 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */ 1996 if (isZeroU(e->Iex.Binop.arg1)) { 1997 e2 = e->Iex.Binop.arg2; 1998 break; 1999 } 2000 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */ 2001 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */ 2002 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) { 2003 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2004 break; 2005 } 2006 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */ 2007 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2008 e2 = e->Iex.Binop.arg1; 2009 break; 2010 } 2011 break; 2012 2013 case Iop_Add8: 2014 /* Add8(t,t) ==> t << 1. 2015 Memcheck doesn't understand that 2016 x+x produces a defined least significant bit, and it seems 2017 simplest just to get rid of the problem by rewriting it 2018 out, since the opportunity to do so exists. */ 2019 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2020 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1, 2021 IRExpr_Const(IRConst_U8(1))); 2022 break; 2023 } 2024 break; 2025 2026 /* NB no Add16(t,t) case yet as no known test case exists */ 2027 2028 case Iop_Add32: 2029 case Iop_Add64: 2030 /* Add32/Add64(x,0) ==> x */ 2031 if (isZeroU(e->Iex.Binop.arg2)) { 2032 e2 = e->Iex.Binop.arg1; 2033 break; 2034 } 2035 /* Add32/Add64(0,x) ==> x */ 2036 if (isZeroU(e->Iex.Binop.arg1)) { 2037 e2 = e->Iex.Binop.arg2; 2038 break; 2039 } 2040 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */ 2041 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2042 e2 = IRExpr_Binop( 2043 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64, 2044 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); 2045 break; 2046 } 2047 break; 2048 2049 case Iop_Sub64: 2050 /* Sub64(x,0) ==> x */ 2051 if (isZeroU64(e->Iex.Binop.arg2)) { 2052 e2 = e->Iex.Binop.arg1; 2053 break; 2054 } 2055 /* Sub64(t,t) ==> 0, for some IRTemp t */ 2056 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2057 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2058 break; 2059 } 2060 break; 2061 2062 case Iop_And32: 2063 /* And32(x,0xFFFFFFFF) ==> x */ 2064 if (isOnesU32(e->Iex.Binop.arg2)) { 2065 e2 = e->Iex.Binop.arg1; 2066 break; 2067 } 2068 /* And32(x,0) ==> 0 */ 2069 if (isZeroU32(e->Iex.Binop.arg2)) { 2070 e2 = e->Iex.Binop.arg2; 2071 break; 2072 } 2073 /* And32(0,x) ==> 0 */ 2074 if (isZeroU32(e->Iex.Binop.arg1)) { 2075 e2 = e->Iex.Binop.arg1; 2076 break; 2077 } 2078 /* And32(t,t) ==> t, for some IRTemp t */ 2079 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2080 e2 = e->Iex.Binop.arg1; 2081 break; 2082 } 2083 break; 2084 2085 case Iop_And8: 2086 case Iop_And16: 2087 case Iop_And64: 2088 case Iop_AndV128: 2089 case Iop_AndV256: 2090 /* And8/And16/And64/AndV128/AndV256(t,t) 2091 ==> t, for some IRTemp t */ 2092 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2093 e2 = e->Iex.Binop.arg1; 2094 break; 2095 } 2096 break; 2097 2098 case Iop_OrV128: 2099 case Iop_OrV256: 2100 /* V128/V256(t,t) ==> t, for some IRTemp t */ 2101 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2102 e2 = e->Iex.Binop.arg1; 2103 break; 2104 } 2105 break; 2106 2107 case Iop_Xor8: 2108 case Iop_Xor16: 2109 case Iop_Xor32: 2110 case Iop_Xor64: 2111 case Iop_XorV128: 2112 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ 2113 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2114 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2115 break; 2116 } 2117 break; 2118 2119 case Iop_Sub32: 2120 case Iop_CmpNE32: 2121 /* Sub32/CmpNE32(t,t) ==> 0, for some IRTemp t */ 2122 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2123 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2124 break; 2125 } 2126 break; 2127 2128 case Iop_CmpEQ32: 2129 case Iop_CmpEQ64: 2130 case Iop_CmpEQ8x8: 2131 case Iop_CmpEQ8x16: 2132 case Iop_CmpEQ16x8: 2133 case Iop_CmpEQ32x4: 2134 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2135 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2136 break; 2137 } 2138 break; 2139 2140 default: 2141 break; 2142 } 2143 } 2144 break; 2145 2146 case Iex_Mux0X: 2147 /* Mux0X */ 2148 2149 /* is the discriminant is a constant? */ 2150 if (e->Iex.Mux0X.cond->tag == Iex_Const) { 2151 Bool zero; 2152 /* assured us by the IR type rules */ 2153 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8); 2154 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond 2155 ->Iex.Const.con->Ico.U8)); 2156 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX; 2157 } 2158 else 2159 /* are the arms identical? (pretty weedy test) */ 2160 if (sameIRExprs(env, e->Iex.Mux0X.expr0, 2161 e->Iex.Mux0X.exprX)) { 2162 e2 = e->Iex.Mux0X.expr0; 2163 } 2164 break; 2165 2166 default: 2167 /* not considered */ 2168 break; 2169 } 2170 2171 /* Show cases where we've found but not folded 'op(t,t)'. Be 2172 careful not to call sameIRExprs with values of different types, 2173 though, else it will assert (and so it should!). We can't 2174 conveniently call typeOfIRExpr on the two args without a whole 2175 bunch of extra plumbing to pass in a type env, so just use a 2176 hacky test to check the arguments are not anything that might 2177 sameIRExprs to assert. This is only OK because this kludge is 2178 only used for debug printing, not for "real" operation. For 2179 "real" operation (ie, all other calls to sameIRExprs), it is 2180 essential that the to args have the same type. 2181 2182 The "right" solution is to plumb the containing block's 2183 IRTypeEnv through to here and use typeOfIRExpr to be sure. But 2184 that's a bunch of extra parameter passing which will just slow 2185 down the normal case, for no purpose. */ 2186 if (vex_control.iropt_verbosity > 0 2187 && e == e2 2188 && e->tag == Iex_Binop 2189 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1, 2190 e->Iex.Binop.arg2) 2191 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2192 vex_printf("vex iropt: fold_Expr: no ident rule for: "); 2193 ppIRExpr(e); 2194 vex_printf("\n"); 2195 } 2196 2197 /* Show the overall results of folding. */ 2198 if (DEBUG_IROPT && e2 != e) { 2199 vex_printf("FOLD: "); 2200 ppIRExpr(e); vex_printf(" -> "); 2201 ppIRExpr(e2); vex_printf("\n"); 2202 } 2203 2204 return e2; 2205 2206 unhandled: 2207 # if 0 2208 vex_printf("\n\n"); 2209 ppIRExpr(e); 2210 vpanic("fold_Expr: no rule for the above"); 2211 # else 2212 if (vex_control.iropt_verbosity > 0) { 2213 vex_printf("vex iropt: fold_Expr: no const rule for: "); 2214 ppIRExpr(e); 2215 vex_printf("\n"); 2216 } 2217 return e2; 2218 # endif 2219 } 2220 2221 2222 /* Apply the subst to a simple 1-level expression -- guaranteed to be 2223 1-level due to previous flattening pass. */ 2224 2225 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) 2226 { 2227 switch (ex->tag) { 2228 case Iex_RdTmp: 2229 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { 2230 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; 2231 if (rhs->tag == Iex_RdTmp) 2232 return rhs; 2233 if (rhs->tag == Iex_Const 2234 && rhs->Iex.Const.con->tag != Ico_F64i) 2235 return rhs; 2236 } 2237 /* not bound in env */ 2238 return ex; 2239 2240 case Iex_Const: 2241 case Iex_Get: 2242 return ex; 2243 2244 case Iex_GetI: 2245 vassert(isIRAtom(ex->Iex.GetI.ix)); 2246 return IRExpr_GetI( 2247 ex->Iex.GetI.descr, 2248 subst_Expr(env, ex->Iex.GetI.ix), 2249 ex->Iex.GetI.bias 2250 ); 2251 2252 case Iex_Qop: { 2253 IRQop* qop = ex->Iex.Qop.details; 2254 vassert(isIRAtom(qop->arg1)); 2255 vassert(isIRAtom(qop->arg2)); 2256 vassert(isIRAtom(qop->arg3)); 2257 vassert(isIRAtom(qop->arg4)); 2258 return IRExpr_Qop( 2259 qop->op, 2260 subst_Expr(env, qop->arg1), 2261 subst_Expr(env, qop->arg2), 2262 subst_Expr(env, qop->arg3), 2263 subst_Expr(env, qop->arg4) 2264 ); 2265 } 2266 2267 case Iex_Triop: { 2268 IRTriop* triop = ex->Iex.Triop.details; 2269 vassert(isIRAtom(triop->arg1)); 2270 vassert(isIRAtom(triop->arg2)); 2271 vassert(isIRAtom(triop->arg3)); 2272 return IRExpr_Triop( 2273 triop->op, 2274 subst_Expr(env, triop->arg1), 2275 subst_Expr(env, triop->arg2), 2276 subst_Expr(env, triop->arg3) 2277 ); 2278 } 2279 2280 case Iex_Binop: 2281 vassert(isIRAtom(ex->Iex.Binop.arg1)); 2282 vassert(isIRAtom(ex->Iex.Binop.arg2)); 2283 return IRExpr_Binop( 2284 ex->Iex.Binop.op, 2285 subst_Expr(env, ex->Iex.Binop.arg1), 2286 subst_Expr(env, ex->Iex.Binop.arg2) 2287 ); 2288 2289 case Iex_Unop: 2290 vassert(isIRAtom(ex->Iex.Unop.arg)); 2291 return IRExpr_Unop( 2292 ex->Iex.Unop.op, 2293 subst_Expr(env, ex->Iex.Unop.arg) 2294 ); 2295 2296 case Iex_Load: 2297 vassert(isIRAtom(ex->Iex.Load.addr)); 2298 return IRExpr_Load( 2299 ex->Iex.Load.end, 2300 ex->Iex.Load.ty, 2301 subst_Expr(env, ex->Iex.Load.addr) 2302 ); 2303 2304 case Iex_CCall: { 2305 Int i; 2306 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args); 2307 for (i = 0; args2[i]; i++) { 2308 vassert(isIRAtom(args2[i])); 2309 args2[i] = subst_Expr(env, args2[i]); 2310 } 2311 return IRExpr_CCall( 2312 ex->Iex.CCall.cee, 2313 ex->Iex.CCall.retty, 2314 args2 2315 ); 2316 } 2317 2318 case Iex_Mux0X: 2319 vassert(isIRAtom(ex->Iex.Mux0X.cond)); 2320 vassert(isIRAtom(ex->Iex.Mux0X.expr0)); 2321 vassert(isIRAtom(ex->Iex.Mux0X.exprX)); 2322 return IRExpr_Mux0X( 2323 subst_Expr(env, ex->Iex.Mux0X.cond), 2324 subst_Expr(env, ex->Iex.Mux0X.expr0), 2325 subst_Expr(env, ex->Iex.Mux0X.exprX) 2326 ); 2327 2328 default: 2329 vex_printf("\n\n"); ppIRExpr(ex); 2330 vpanic("subst_Expr"); 2331 2332 } 2333 } 2334 2335 2336 /* Apply the subst to stmt, then fold the result as much as possible. 2337 Much simplified due to stmt being previously flattened. As a 2338 result of this, the stmt may wind up being turned into a no-op. 2339 */ 2340 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) 2341 { 2342 # if 0 2343 vex_printf("\nsubst and fold stmt\n"); 2344 ppIRStmt(st); 2345 vex_printf("\n"); 2346 # endif 2347 2348 switch (st->tag) { 2349 case Ist_AbiHint: 2350 vassert(isIRAtom(st->Ist.AbiHint.base)); 2351 vassert(isIRAtom(st->Ist.AbiHint.nia)); 2352 return IRStmt_AbiHint( 2353 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)), 2354 st->Ist.AbiHint.len, 2355 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia)) 2356 ); 2357 case Ist_Put: 2358 vassert(isIRAtom(st->Ist.Put.data)); 2359 return IRStmt_Put( 2360 st->Ist.Put.offset, 2361 fold_Expr(env, subst_Expr(env, st->Ist.Put.data)) 2362 ); 2363 2364 case Ist_PutI: { 2365 IRPutI *puti, *puti2; 2366 puti = st->Ist.PutI.details; 2367 vassert(isIRAtom(puti->ix)); 2368 vassert(isIRAtom(puti->data)); 2369 puti2 = mkIRPutI(puti->descr, 2370 fold_Expr(env, subst_Expr(env, puti->ix)), 2371 puti->bias, 2372 fold_Expr(env, subst_Expr(env, puti->data))); 2373 return IRStmt_PutI(puti2); 2374 } 2375 2376 case Ist_WrTmp: 2377 /* This is the one place where an expr (st->Ist.WrTmp.data) is 2378 allowed to be more than just a constant or a tmp. */ 2379 return IRStmt_WrTmp( 2380 st->Ist.WrTmp.tmp, 2381 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data)) 2382 ); 2383 2384 case Ist_Store: 2385 vassert(isIRAtom(st->Ist.Store.addr)); 2386 vassert(isIRAtom(st->Ist.Store.data)); 2387 return IRStmt_Store( 2388 st->Ist.Store.end, 2389 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)), 2390 fold_Expr(env, subst_Expr(env, st->Ist.Store.data)) 2391 ); 2392 2393 case Ist_CAS: { 2394 IRCAS *cas, *cas2; 2395 cas = st->Ist.CAS.details; 2396 vassert(isIRAtom(cas->addr)); 2397 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 2398 vassert(isIRAtom(cas->expdLo)); 2399 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 2400 vassert(isIRAtom(cas->dataLo)); 2401 cas2 = mkIRCAS( 2402 cas->oldHi, cas->oldLo, cas->end, 2403 fold_Expr(env, subst_Expr(env, cas->addr)), 2404 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) 2405 : NULL, 2406 fold_Expr(env, subst_Expr(env, cas->expdLo)), 2407 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) 2408 : NULL, 2409 fold_Expr(env, subst_Expr(env, cas->dataLo)) 2410 ); 2411 return IRStmt_CAS(cas2); 2412 } 2413 2414 case Ist_LLSC: 2415 vassert(isIRAtom(st->Ist.LLSC.addr)); 2416 if (st->Ist.LLSC.storedata) 2417 vassert(isIRAtom(st->Ist.LLSC.storedata)); 2418 return IRStmt_LLSC( 2419 st->Ist.LLSC.end, 2420 st->Ist.LLSC.result, 2421 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)), 2422 st->Ist.LLSC.storedata 2423 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata)) 2424 : NULL 2425 ); 2426 2427 case Ist_Dirty: { 2428 Int i; 2429 IRDirty *d, *d2; 2430 d = st->Ist.Dirty.details; 2431 d2 = emptyIRDirty(); 2432 *d2 = *d; 2433 d2->args = shallowCopyIRExprVec(d2->args); 2434 if (d2->mFx != Ifx_None) { 2435 vassert(isIRAtom(d2->mAddr)); 2436 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr)); 2437 } 2438 vassert(isIRAtom(d2->guard)); 2439 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard)); 2440 for (i = 0; d2->args[i]; i++) { 2441 vassert(isIRAtom(d2->args[i])); 2442 d2->args[i] = fold_Expr(env, subst_Expr(env, d2->args[i])); 2443 } 2444 return IRStmt_Dirty(d2); 2445 } 2446 2447 case Ist_IMark: 2448 return IRStmt_IMark(st->Ist.IMark.addr, 2449 st->Ist.IMark.len, 2450 st->Ist.IMark.delta); 2451 2452 case Ist_NoOp: 2453 return IRStmt_NoOp(); 2454 2455 case Ist_MBE: 2456 return IRStmt_MBE(st->Ist.MBE.event); 2457 2458 case Ist_Exit: { 2459 IRExpr* fcond; 2460 vassert(isIRAtom(st->Ist.Exit.guard)); 2461 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard)); 2462 if (fcond->tag == Iex_Const) { 2463 /* Interesting. The condition on this exit has folded down to 2464 a constant. */ 2465 vassert(fcond->Iex.Const.con->tag == Ico_U1); 2466 vassert(fcond->Iex.Const.con->Ico.U1 == False 2467 || fcond->Iex.Const.con->Ico.U1 == True); 2468 if (fcond->Iex.Const.con->Ico.U1 == False) { 2469 /* exit is never going to happen, so dump the statement. */ 2470 return IRStmt_NoOp(); 2471 } else { 2472 vassert(fcond->Iex.Const.con->Ico.U1 == True); 2473 /* Hmmm. The exit has become unconditional. Leave it 2474 as it is for now, since we'd have to truncate the BB 2475 at this point, which is tricky. Such truncation is 2476 done later by the dead-code elimination pass. */ 2477 /* fall out into the reconstruct-the-exit code. */ 2478 if (vex_control.iropt_verbosity > 0) 2479 /* really a misuse of vex_control.iropt_verbosity */ 2480 vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); 2481 } 2482 } 2483 return IRStmt_Exit(fcond, st->Ist.Exit.jk, 2484 st->Ist.Exit.dst, st->Ist.Exit.offsIP); 2485 } 2486 2487 default: 2488 vex_printf("\n"); ppIRStmt(st); 2489 vpanic("subst_and_fold_Stmt"); 2490 } 2491 } 2492 2493 2494 IRSB* cprop_BB ( IRSB* in ) 2495 { 2496 Int i; 2497 IRSB* out; 2498 IRStmt* st2; 2499 Int n_tmps = in->tyenv->types_used; 2500 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*)); 2501 2502 out = emptyIRSB(); 2503 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 2504 2505 /* Set up the env with which travels forward. This holds a 2506 substitution, mapping IRTemps to IRExprs. The environment 2507 is to be applied as we move along. Keys are IRTemps. 2508 Values are IRExpr*s. 2509 */ 2510 for (i = 0; i < n_tmps; i++) 2511 env[i] = NULL; 2512 2513 /* For each original SSA-form stmt ... */ 2514 for (i = 0; i < in->stmts_used; i++) { 2515 2516 /* First apply the substitution to the current stmt. This 2517 propagates in any constants and tmp-tmp assignments 2518 accumulated prior to this point. As part of the subst_Stmt 2519 call, also then fold any constant expressions resulting. */ 2520 2521 st2 = in->stmts[i]; 2522 2523 /* perhaps st2 is already a no-op? */ 2524 if (st2->tag == Ist_NoOp) continue; 2525 2526 st2 = subst_and_fold_Stmt( env, st2 ); 2527 2528 /* If the statement has been folded into a no-op, forget it. */ 2529 if (st2->tag == Ist_NoOp) continue; 2530 2531 /* If the statement assigns to an IRTemp add it to the running 2532 environment. This is for the benefit of copy propagation 2533 and to allow sameIRExpr look through IRTemps. */ 2534 if (st2->tag == Ist_WrTmp) { 2535 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); 2536 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 2537 2538 /* 't1 = t2' -- don't add to BB; will be optimized out */ 2539 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) continue; 2540 2541 /* 't = const' && 'const != F64i' -- don't add to BB 2542 Note, we choose not to propagate const when const is an 2543 F64i, so that F64i literals can be CSE'd later. This helps 2544 x86 floating point code generation. */ 2545 if (st2->Ist.WrTmp.data->tag == Iex_Const 2546 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) continue; 2547 } 2548 2549 /* Not interesting, copy st2 into the output block. */ 2550 addStmtToIRSB( out, st2 ); 2551 } 2552 2553 #if STATS_IROPT 2554 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", 2555 invocation_count, recursion_count, success_count, 2556 recursion_success_count, max_nodes_visited); 2557 #endif 2558 2559 out->next = subst_Expr( env, in->next ); 2560 out->jumpkind = in->jumpkind; 2561 out->offsIP = in->offsIP; 2562 return out; 2563 } 2564 2565 2566 /*---------------------------------------------------------------*/ 2567 /*--- Dead code (t = E) removal ---*/ 2568 /*---------------------------------------------------------------*/ 2569 2570 /* As a side effect, also removes all code following an unconditional 2571 side exit. */ 2572 2573 /* The type of the HashHW map is: a map from IRTemp to nothing 2574 -- really just operating a set or IRTemps. 2575 */ 2576 2577 inline 2578 static void addUses_Temp ( Bool* set, IRTemp tmp ) 2579 { 2580 set[(Int)tmp] = True; 2581 } 2582 2583 static void addUses_Expr ( Bool* set, IRExpr* e ) 2584 { 2585 Int i; 2586 switch (e->tag) { 2587 case Iex_GetI: 2588 addUses_Expr(set, e->Iex.GetI.ix); 2589 return; 2590 case Iex_Mux0X: 2591 addUses_Expr(set, e->Iex.Mux0X.cond); 2592 addUses_Expr(set, e->Iex.Mux0X.expr0); 2593 addUses_Expr(set, e->Iex.Mux0X.exprX); 2594 return; 2595 case Iex_CCall: 2596 for (i = 0; e->Iex.CCall.args[i]; i++) 2597 addUses_Expr(set, e->Iex.CCall.args[i]); 2598 return; 2599 case Iex_Load: 2600 addUses_Expr(set, e->Iex.Load.addr); 2601 return; 2602 case Iex_Qop: 2603 addUses_Expr(set, e->Iex.Qop.details->arg1); 2604 addUses_Expr(set, e->Iex.Qop.details->arg2); 2605 addUses_Expr(set, e->Iex.Qop.details->arg3); 2606 addUses_Expr(set, e->Iex.Qop.details->arg4); 2607 return; 2608 case Iex_Triop: 2609 addUses_Expr(set, e->Iex.Triop.details->arg1); 2610 addUses_Expr(set, e->Iex.Triop.details->arg2); 2611 addUses_Expr(set, e->Iex.Triop.details->arg3); 2612 return; 2613 case Iex_Binop: 2614 addUses_Expr(set, e->Iex.Binop.arg1); 2615 addUses_Expr(set, e->Iex.Binop.arg2); 2616 return; 2617 case Iex_Unop: 2618 addUses_Expr(set, e->Iex.Unop.arg); 2619 return; 2620 case Iex_RdTmp: 2621 addUses_Temp(set, e->Iex.RdTmp.tmp); 2622 return; 2623 case Iex_Const: 2624 case Iex_Get: 2625 return; 2626 default: 2627 vex_printf("\n"); 2628 ppIRExpr(e); 2629 vpanic("addUses_Expr"); 2630 } 2631 } 2632 2633 static void addUses_Stmt ( Bool* set, IRStmt* st ) 2634 { 2635 Int i; 2636 IRDirty* d; 2637 IRCAS* cas; 2638 switch (st->tag) { 2639 case Ist_AbiHint: 2640 addUses_Expr(set, st->Ist.AbiHint.base); 2641 addUses_Expr(set, st->Ist.AbiHint.nia); 2642 return; 2643 case Ist_PutI: 2644 addUses_Expr(set, st->Ist.PutI.details->ix); 2645 addUses_Expr(set, st->Ist.PutI.details->data); 2646 return; 2647 case Ist_WrTmp: 2648 addUses_Expr(set, st->Ist.WrTmp.data); 2649 return; 2650 case Ist_Put: 2651 addUses_Expr(set, st->Ist.Put.data); 2652 return; 2653 case Ist_Store: 2654 addUses_Expr(set, st->Ist.Store.addr); 2655 addUses_Expr(set, st->Ist.Store.data); 2656 return; 2657 case Ist_CAS: 2658 cas = st->Ist.CAS.details; 2659 addUses_Expr(set, cas->addr); 2660 if (cas->expdHi) 2661 addUses_Expr(set, cas->expdHi); 2662 addUses_Expr(set, cas->expdLo); 2663 if (cas->dataHi) 2664 addUses_Expr(set, cas->dataHi); 2665 addUses_Expr(set, cas->dataLo); 2666 return; 2667 case Ist_LLSC: 2668 addUses_Expr(set, st->Ist.LLSC.addr); 2669 if (st->Ist.LLSC.storedata) 2670 addUses_Expr(set, st->Ist.LLSC.storedata); 2671 return; 2672 case Ist_Dirty: 2673 d = st->Ist.Dirty.details; 2674 if (d->mFx != Ifx_None) 2675 addUses_Expr(set, d->mAddr); 2676 addUses_Expr(set, d->guard); 2677 for (i = 0; d->args[i] != NULL; i++) 2678 addUses_Expr(set, d->args[i]); 2679 return; 2680 case Ist_NoOp: 2681 case Ist_IMark: 2682 case Ist_MBE: 2683 return; 2684 case Ist_Exit: 2685 addUses_Expr(set, st->Ist.Exit.guard); 2686 return; 2687 default: 2688 vex_printf("\n"); 2689 ppIRStmt(st); 2690 vpanic("addUses_Stmt"); 2691 } 2692 } 2693 2694 2695 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */ 2696 static Bool isZeroU1 ( IRExpr* e ) 2697 { 2698 return toBool( e->tag == Iex_Const 2699 && e->Iex.Const.con->tag == Ico_U1 2700 && e->Iex.Const.con->Ico.U1 == False ); 2701 } 2702 2703 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */ 2704 static Bool isOneU1 ( IRExpr* e ) 2705 { 2706 return toBool( e->tag == Iex_Const 2707 && e->Iex.Const.con->tag == Ico_U1 2708 && e->Iex.Const.con->Ico.U1 == True ); 2709 } 2710 2711 2712 /* Note, this destructively modifies the given IRSB. */ 2713 2714 /* Scan backwards through statements, carrying a set of IRTemps which 2715 are known to be used after the current point. On encountering 't = 2716 E', delete the binding if it is not used. Otherwise, add any temp 2717 uses to the set and keep on moving backwards. 2718 2719 As an enhancement, the first (backwards) pass searches for IR exits 2720 with always-taken conditions and notes the location of the earliest 2721 one in the block. If any such are found, a second pass copies the 2722 exit destination and jump kind to the bb-end. Then, the exit and 2723 all statements following it are turned into no-ops. 2724 */ 2725 2726 /* notstatic */ void do_deadcode_BB ( IRSB* bb ) 2727 { 2728 Int i, i_unconditional_exit; 2729 Int n_tmps = bb->tyenv->types_used; 2730 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool)); 2731 IRStmt* st; 2732 2733 for (i = 0; i < n_tmps; i++) 2734 set[i] = False; 2735 2736 /* start off by recording IRTemp uses in the next field. */ 2737 addUses_Expr(set, bb->next); 2738 2739 /* First pass */ 2740 2741 /* Work backwards through the stmts */ 2742 i_unconditional_exit = -1; 2743 for (i = bb->stmts_used-1; i >= 0; i--) { 2744 st = bb->stmts[i]; 2745 if (st->tag == Ist_NoOp) 2746 continue; 2747 /* take note of any unconditional exits */ 2748 if (st->tag == Ist_Exit 2749 && isOneU1(st->Ist.Exit.guard)) 2750 i_unconditional_exit = i; 2751 if (st->tag == Ist_WrTmp 2752 && set[(Int)(st->Ist.WrTmp.tmp)] == False) { 2753 /* it's an IRTemp which never got used. Delete it. */ 2754 if (DEBUG_IROPT) { 2755 vex_printf("DEAD: "); 2756 ppIRStmt(st); 2757 vex_printf("\n"); 2758 } 2759 bb->stmts[i] = IRStmt_NoOp(); 2760 } 2761 else 2762 if (st->tag == Ist_Dirty 2763 && st->Ist.Dirty.details->guard 2764 && isZeroU1(st->Ist.Dirty.details->guard)) { 2765 /* This is a dirty helper which will never get called. 2766 Delete it. */ 2767 bb->stmts[i] = IRStmt_NoOp(); 2768 } 2769 else { 2770 /* Note any IRTemp uses made by the current statement. */ 2771 addUses_Stmt(set, st); 2772 } 2773 } 2774 2775 /* Optional second pass: if any unconditional exits were found, 2776 delete them and all following statements. */ 2777 2778 if (i_unconditional_exit != -1) { 2779 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", 2780 i_unconditional_exit); 2781 vassert(i_unconditional_exit >= 0 2782 && i_unconditional_exit < bb->stmts_used); 2783 bb->next 2784 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); 2785 bb->jumpkind 2786 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; 2787 bb->offsIP 2788 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; 2789 for (i = i_unconditional_exit; i < bb->stmts_used; i++) 2790 bb->stmts[i] = IRStmt_NoOp(); 2791 } 2792 } 2793 2794 2795 /*---------------------------------------------------------------*/ 2796 /*--- Specialisation of helper function calls, in ---*/ 2797 /*--- collaboration with the front end ---*/ 2798 /*---------------------------------------------------------------*/ 2799 2800 static 2801 IRSB* spec_helpers_BB( 2802 IRSB* bb, 2803 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int) 2804 ) 2805 { 2806 Int i; 2807 IRStmt* st; 2808 IRExpr* ex; 2809 Bool any = False; 2810 2811 for (i = bb->stmts_used-1; i >= 0; i--) { 2812 st = bb->stmts[i]; 2813 2814 if (st->tag != Ist_WrTmp 2815 || st->Ist.WrTmp.data->tag != Iex_CCall) 2816 continue; 2817 2818 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, 2819 st->Ist.WrTmp.data->Iex.CCall.args, 2820 &bb->stmts[0], i ); 2821 if (!ex) 2822 /* the front end can't think of a suitable replacement */ 2823 continue; 2824 2825 /* We got something better. Install it in the bb. */ 2826 any = True; 2827 bb->stmts[i] 2828 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); 2829 2830 if (0) { 2831 vex_printf("SPEC: "); 2832 ppIRExpr(st->Ist.WrTmp.data); 2833 vex_printf(" --> "); 2834 ppIRExpr(ex); 2835 vex_printf("\n"); 2836 } 2837 } 2838 2839 if (any) 2840 bb = flatten_BB(bb); 2841 return bb; 2842 } 2843 2844 2845 /*---------------------------------------------------------------*/ 2846 /*--- Determination of guest state aliasing relationships ---*/ 2847 /*---------------------------------------------------------------*/ 2848 2849 /* These are helper functions for CSE and GetI/PutI transformations. 2850 2851 Determine, to the extent possible, the relationship between two 2852 guest state accesses. The possible outcomes are: 2853 2854 * Exact alias. These two accesses denote precisely the same 2855 piece of the guest state. 2856 2857 * Definitely no alias. These two accesses are guaranteed not to 2858 overlap any part of the guest state. 2859 2860 * Unknown -- if neither of the above can be established. 2861 2862 If in doubt, return Unknown. */ 2863 2864 typedef 2865 enum { ExactAlias, NoAlias, UnknownAlias } 2866 GSAliasing; 2867 2868 2869 /* Produces the alias relation between an indexed guest 2870 state access and a non-indexed access. */ 2871 2872 static 2873 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1, 2874 Int offset2, IRType ty2 ) 2875 { 2876 UInt minoff1, maxoff1, minoff2, maxoff2; 2877 2878 getArrayBounds( descr1, &minoff1, &maxoff1 ); 2879 minoff2 = offset2; 2880 maxoff2 = minoff2 + sizeofIRType(ty2) - 1; 2881 2882 if (maxoff1 < minoff2 || maxoff2 < minoff1) 2883 return NoAlias; 2884 2885 /* Could probably do better here if required. For the moment 2886 however just claim not to know anything more. */ 2887 return UnknownAlias; 2888 } 2889 2890 2891 /* Produces the alias relation between two indexed guest state 2892 accesses. */ 2893 2894 static 2895 GSAliasing getAliasingRelation_II ( 2896 IRRegArray* descr1, IRExpr* ix1, Int bias1, 2897 IRRegArray* descr2, IRExpr* ix2, Int bias2 2898 ) 2899 { 2900 UInt minoff1, maxoff1, minoff2, maxoff2; 2901 Int iters; 2902 2903 /* First try hard to show they don't alias. */ 2904 getArrayBounds( descr1, &minoff1, &maxoff1 ); 2905 getArrayBounds( descr2, &minoff2, &maxoff2 ); 2906 if (maxoff1 < minoff2 || maxoff2 < minoff1) 2907 return NoAlias; 2908 2909 /* So the two arrays at least partially overlap. To get any 2910 further we'll have to be sure that the descriptors are 2911 identical. */ 2912 if (!eqIRRegArray(descr1, descr2)) 2913 return UnknownAlias; 2914 2915 /* The descriptors are identical. Now the only difference can be 2916 in the index expressions. If they cannot be shown to be 2917 identical, we have to say we don't know what the aliasing 2918 relation will be. Now, since the IR is flattened, the index 2919 expressions should be atoms -- either consts or tmps. So that 2920 makes the comparison simple. */ 2921 vassert(isIRAtom(ix1)); 2922 vassert(isIRAtom(ix2)); 2923 if (!eqIRAtom(ix1,ix2)) 2924 return UnknownAlias; 2925 2926 /* Ok, the index expressions are identical. So now the only way 2927 they can be different is in the bias. Normalise this 2928 paranoidly, to reliably establish equality/non-equality. */ 2929 2930 /* So now we know that the GetI and PutI index the same array 2931 with the same base. Are the offsets the same, modulo the 2932 array size? Do this paranoidly. */ 2933 vassert(descr1->nElems == descr2->nElems); 2934 vassert(descr1->elemTy == descr2->elemTy); 2935 vassert(descr1->base == descr2->base); 2936 iters = 0; 2937 while (bias1 < 0 || bias2 < 0) { 2938 bias1 += descr1->nElems; 2939 bias2 += descr1->nElems; 2940 iters++; 2941 if (iters > 10) 2942 vpanic("getAliasingRelation: iters"); 2943 } 2944 vassert(bias1 >= 0 && bias2 >= 0); 2945 bias1 %= descr1->nElems; 2946 bias2 %= descr1->nElems; 2947 vassert(bias1 >= 0 && bias1 < descr1->nElems); 2948 vassert(bias2 >= 0 && bias2 < descr1->nElems); 2949 2950 /* Finally, biasP and biasG are normalised into the range 2951 0 .. descrP/G->nElems - 1. And so we can establish 2952 equality/non-equality. */ 2953 2954 return bias1==bias2 ? ExactAlias : NoAlias; 2955 } 2956 2957 2958 /*---------------------------------------------------------------*/ 2959 /*--- Common Subexpression Elimination ---*/ 2960 /*---------------------------------------------------------------*/ 2961 2962 /* Expensive in time and space. */ 2963 2964 /* Uses two environments: 2965 a IRTemp -> IRTemp mapping 2966 a mapping from AvailExpr* to IRTemp 2967 */ 2968 2969 typedef 2970 struct { 2971 enum { TCc, TCt } tag; 2972 union { IRTemp tmp; IRConst* con; } u; 2973 } 2974 TmpOrConst; 2975 2976 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 ) 2977 { 2978 if (tc1->tag != tc2->tag) 2979 return False; 2980 switch (tc1->tag) { 2981 case TCc: 2982 return eqIRConst(tc1->u.con, tc2->u.con); 2983 case TCt: 2984 return tc1->u.tmp == tc2->u.tmp; 2985 default: 2986 vpanic("eqTmpOrConst"); 2987 } 2988 } 2989 2990 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 ) 2991 { 2992 Bool eq = cee1->addr == cee2->addr; 2993 if (eq) { 2994 vassert(cee1->regparms == cee2->regparms); 2995 vassert(cee1->mcx_mask == cee2->mcx_mask); 2996 /* Names should be the same too, but we don't bother to 2997 check. */ 2998 } 2999 return eq; 3000 } 3001 3002 /* Convert a NULL terminated IRExpr* vector to an array of 3003 TmpOrConsts, and a length. */ 3004 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs, 3005 /*OUT*/Int* nOuts, 3006 IRExpr** ins ) 3007 { 3008 Int i, n; 3009 /* We have to make two passes, one to count, one to copy. */ 3010 for (n = 0; ins[n]; n++) 3011 ; 3012 *outs = LibVEX_Alloc(n * sizeof(TmpOrConst)); 3013 *nOuts = n; 3014 /* and now copy .. */ 3015 for (i = 0; i < n; i++) { 3016 IRExpr* arg = ins[i]; 3017 TmpOrConst* dst = &(*outs)[i]; 3018 if (arg->tag == Iex_RdTmp) { 3019 dst->tag = TCt; 3020 dst->u.tmp = arg->Iex.RdTmp.tmp; 3021 } 3022 else if (arg->tag == Iex_Const) { 3023 dst->tag = TCc; 3024 dst->u.con = arg->Iex.Const.con; 3025 } 3026 else { 3027 /* Failure of this is serious; it means that the presented arg 3028 isn't an IR atom, as it should be. */ 3029 vpanic("irExprVec_to_TmpOrConsts"); 3030 } 3031 } 3032 } 3033 3034 typedef 3035 struct { 3036 enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt, CCall } tag; 3037 union { 3038 /* unop(tmp) */ 3039 struct { 3040 IROp op; 3041 IRTemp arg; 3042 } Ut; 3043 /* binop(tmp,tmp) */ 3044 struct { 3045 IROp op; 3046 IRTemp arg1; 3047 IRTemp arg2; 3048 } Btt; 3049 /* binop(tmp,const) */ 3050 struct { 3051 IROp op; 3052 IRTemp arg1; 3053 IRConst con2; 3054 } Btc; 3055 /* binop(const,tmp) */ 3056 struct { 3057 IROp op; 3058 IRConst con1; 3059 IRTemp arg2; 3060 } Bct; 3061 /* F64i-style const */ 3062 struct { 3063 ULong f64i; 3064 } Cf64i; 3065 /* Mux0X(tmp,tmp,tmp) */ 3066 struct { 3067 IRTemp co; 3068 IRTemp e0; 3069 IRTemp eX; 3070 } Mttt; 3071 /* GetI(descr,tmp,bias)*/ 3072 struct { 3073 IRRegArray* descr; 3074 IRTemp ix; 3075 Int bias; 3076 } GetIt; 3077 /* Clean helper call */ 3078 struct { 3079 IRCallee* cee; 3080 TmpOrConst* args; 3081 Int nArgs; 3082 IRType retty; 3083 } CCall; 3084 } u; 3085 } 3086 AvailExpr; 3087 3088 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 ) 3089 { 3090 if (LIKELY(a1->tag != a2->tag)) 3091 return False; 3092 switch (a1->tag) { 3093 case Ut: 3094 return toBool( 3095 a1->u.Ut.op == a2->u.Ut.op 3096 && a1->u.Ut.arg == a2->u.Ut.arg); 3097 case Btt: 3098 return toBool( 3099 a1->u.Btt.op == a2->u.Btt.op 3100 && a1->u.Btt.arg1 == a2->u.Btt.arg1 3101 && a1->u.Btt.arg2 == a2->u.Btt.arg2); 3102 case Btc: 3103 return toBool( 3104 a1->u.Btc.op == a2->u.Btc.op 3105 && a1->u.Btc.arg1 == a2->u.Btc.arg1 3106 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2)); 3107 case Bct: 3108 return toBool( 3109 a1->u.Bct.op == a2->u.Bct.op 3110 && a1->u.Bct.arg2 == a2->u.Bct.arg2 3111 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1)); 3112 case Cf64i: 3113 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i); 3114 case Mttt: 3115 return toBool(a1->u.Mttt.co == a2->u.Mttt.co 3116 && a1->u.Mttt.e0 == a2->u.Mttt.e0 3117 && a1->u.Mttt.eX == a2->u.Mttt.eX); 3118 case GetIt: 3119 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr) 3120 && a1->u.GetIt.ix == a2->u.GetIt.ix 3121 && a1->u.GetIt.bias == a2->u.GetIt.bias); 3122 case CCall: { 3123 Int i, n; 3124 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs 3125 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee); 3126 if (eq) { 3127 n = a1->u.CCall.nArgs; 3128 for (i = 0; i < n; i++) { 3129 if (!eqTmpOrConst( &a1->u.CCall.args[i], 3130 &a2->u.CCall.args[i] )) { 3131 eq = False; 3132 break; 3133 } 3134 } 3135 } 3136 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty); 3137 return eq; 3138 } 3139 default: vpanic("eq_AvailExpr"); 3140 } 3141 } 3142 3143 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae ) 3144 { 3145 IRConst* con; 3146 switch (ae->tag) { 3147 case Ut: 3148 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) ); 3149 case Btt: 3150 return IRExpr_Binop( ae->u.Btt.op, 3151 IRExpr_RdTmp(ae->u.Btt.arg1), 3152 IRExpr_RdTmp(ae->u.Btt.arg2) ); 3153 case Btc: 3154 con = LibVEX_Alloc(sizeof(IRConst)); 3155 *con = ae->u.Btc.con2; 3156 return IRExpr_Binop( ae->u.Btc.op, 3157 IRExpr_RdTmp(ae->u.Btc.arg1), 3158 IRExpr_Const(con) ); 3159 case Bct: 3160 con = LibVEX_Alloc(sizeof(IRConst)); 3161 *con = ae->u.Bct.con1; 3162 return IRExpr_Binop( ae->u.Bct.op, 3163 IRExpr_Const(con), 3164 IRExpr_RdTmp(ae->u.Bct.arg2) ); 3165 case Cf64i: 3166 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i)); 3167 case Mttt: 3168 return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co), 3169 IRExpr_RdTmp(ae->u.Mttt.e0), 3170 IRExpr_RdTmp(ae->u.Mttt.eX)); 3171 case GetIt: 3172 return IRExpr_GetI(ae->u.GetIt.descr, 3173 IRExpr_RdTmp(ae->u.GetIt.ix), 3174 ae->u.GetIt.bias); 3175 case CCall: { 3176 Int i, n = ae->u.CCall.nArgs; 3177 vassert(n >= 0); 3178 IRExpr** vec = LibVEX_Alloc((n+1) * sizeof(IRExpr*)); 3179 vec[n] = NULL; 3180 for (i = 0; i < n; i++) { 3181 TmpOrConst* tc = &ae->u.CCall.args[i]; 3182 if (tc->tag == TCc) { 3183 vec[i] = IRExpr_Const(tc->u.con); 3184 } 3185 else if (tc->tag == TCt) { 3186 vec[i] = IRExpr_RdTmp(tc->u.tmp); 3187 } 3188 else vpanic("availExpr_to_IRExpr:CCall-arg"); 3189 } 3190 return IRExpr_CCall(ae->u.CCall.cee, 3191 ae->u.CCall.retty, 3192 vec); 3193 } 3194 default: 3195 vpanic("availExpr_to_IRExpr"); 3196 } 3197 } 3198 3199 inline 3200 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp ) 3201 { 3202 HWord res; 3203 /* env :: IRTemp -> IRTemp */ 3204 if (lookupHHW( env, &res, (HWord)tmp )) 3205 return (IRTemp)res; 3206 else 3207 return tmp; 3208 } 3209 3210 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae ) 3211 { 3212 /* env :: IRTemp -> IRTemp */ 3213 switch (ae->tag) { 3214 case Ut: 3215 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg ); 3216 break; 3217 case Btt: 3218 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 ); 3219 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 ); 3220 break; 3221 case Btc: 3222 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 ); 3223 break; 3224 case Bct: 3225 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 ); 3226 break; 3227 case Cf64i: 3228 break; 3229 case Mttt: 3230 ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co ); 3231 ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 ); 3232 ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX ); 3233 break; 3234 case GetIt: 3235 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix ); 3236 break; 3237 case CCall: { 3238 Int i, n = ae->u.CCall.nArgs;; 3239 for (i = 0; i < n; i++) { 3240 TmpOrConst* tc = &ae->u.CCall.args[i]; 3241 if (tc->tag == TCt) { 3242 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp ); 3243 } 3244 } 3245 break; 3246 } 3247 default: 3248 vpanic("subst_AvailExpr"); 3249 } 3250 } 3251 3252 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e ) 3253 { 3254 AvailExpr* ae; 3255 3256 if (e->tag == Iex_Unop 3257 && e->Iex.Unop.arg->tag == Iex_RdTmp) { 3258 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3259 ae->tag = Ut; 3260 ae->u.Ut.op = e->Iex.Unop.op; 3261 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp; 3262 return ae; 3263 } 3264 3265 if (e->tag == Iex_Binop 3266 && e->Iex.Binop.arg1->tag == Iex_RdTmp 3267 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3268 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3269 ae->tag = Btt; 3270 ae->u.Btt.op = e->Iex.Binop.op; 3271 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3272 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3273 return ae; 3274 } 3275 3276 if (e->tag == Iex_Binop 3277 && e->Iex.Binop.arg1->tag == Iex_RdTmp 3278 && e->Iex.Binop.arg2->tag == Iex_Const) { 3279 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3280 ae->tag = Btc; 3281 ae->u.Btc.op = e->Iex.Binop.op; 3282 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3283 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con); 3284 return ae; 3285 } 3286 3287 if (e->tag == Iex_Binop 3288 && e->Iex.Binop.arg1->tag == Iex_Const 3289 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3290 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3291 ae->tag = Bct; 3292 ae->u.Bct.op = e->Iex.Binop.op; 3293 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3294 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con); 3295 return ae; 3296 } 3297 3298 if (e->tag == Iex_Const 3299 && e->Iex.Const.con->tag == Ico_F64i) { 3300 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3301 ae->tag = Cf64i; 3302 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i; 3303 return ae; 3304 } 3305 3306 if (e->tag == Iex_Mux0X 3307 && e->Iex.Mux0X.cond->tag == Iex_RdTmp 3308 && e->Iex.Mux0X.expr0->tag == Iex_RdTmp 3309 && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) { 3310 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3311 ae->tag = Mttt; 3312 ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp; 3313 ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp; 3314 ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp; 3315 return ae; 3316 } 3317 3318 if (e->tag == Iex_GetI 3319 && e->Iex.GetI.ix->tag == Iex_RdTmp) { 3320 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3321 ae->tag = GetIt; 3322 ae->u.GetIt.descr = e->Iex.GetI.descr; 3323 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp; 3324 ae->u.GetIt.bias = e->Iex.GetI.bias; 3325 return ae; 3326 } 3327 3328 if (e->tag == Iex_CCall) { 3329 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3330 ae->tag = CCall; 3331 /* Ok to share only the cee, since it is immutable. */ 3332 ae->u.CCall.cee = e->Iex.CCall.cee; 3333 ae->u.CCall.retty = e->Iex.CCall.retty; 3334 /* irExprVec_to_TmpOrConsts will assert if the args are 3335 neither tmps nor constants, but that's ok .. that's all they 3336 should be. */ 3337 irExprVec_to_TmpOrConsts( 3338 &ae->u.CCall.args, &ae->u.CCall.nArgs, 3339 e->Iex.CCall.args 3340 ); 3341 return ae; 3342 } 3343 3344 return NULL; 3345 } 3346 3347 3348 /* The BB is modified in-place. Returns True if any changes were 3349 made. */ 3350 3351 static Bool do_cse_BB ( IRSB* bb ) 3352 { 3353 Int i, j, paranoia; 3354 IRTemp t, q; 3355 IRStmt* st; 3356 AvailExpr* eprime; 3357 AvailExpr* ae; 3358 Bool invalidate; 3359 Bool anyDone = False; 3360 3361 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ 3362 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ 3363 3364 vassert(sizeof(IRTemp) <= sizeof(HWord)); 3365 3366 if (0) { ppIRSB(bb); vex_printf("\n\n"); } 3367 3368 /* Iterate forwards over the stmts. 3369 On seeing "t = E", where E is one of the 5 AvailExpr forms: 3370 let E' = apply tenv substitution to E 3371 search aenv for E' 3372 if a mapping E' -> q is found, 3373 replace this stmt by "t = q" 3374 and add binding t -> q to tenv 3375 else 3376 add binding E' -> t to aenv 3377 replace this stmt by "t = E'" 3378 3379 Other statements are only interesting to the extent that they 3380 might invalidate some of the expressions in aenv. So there is 3381 an invalidate-bindings check for each statement seen. 3382 */ 3383 for (i = 0; i < bb->stmts_used; i++) { 3384 st = bb->stmts[i]; 3385 3386 /* ------ BEGIN invalidate aenv bindings ------ */ 3387 /* This is critical: remove from aenv any E' -> .. bindings 3388 which might be invalidated by this statement. The only 3389 vulnerable kind of bindings are the GetI kind. 3390 Dirty call - dump (paranoia level -> 2) 3391 Store - dump (ditto) 3392 Put, PutI - dump unless no-overlap is proven (.. -> 1) 3393 Uses getAliasingRelation_IC and getAliasingRelation_II 3394 to do the no-overlap assessments needed for Put/PutI. 3395 */ 3396 switch (st->tag) { 3397 case Ist_Dirty: case Ist_Store: case Ist_MBE: 3398 case Ist_CAS: case Ist_LLSC: 3399 paranoia = 2; break; 3400 case Ist_Put: case Ist_PutI: 3401 paranoia = 1; break; 3402 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: 3403 case Ist_WrTmp: case Ist_Exit: 3404 paranoia = 0; break; 3405 default: 3406 vpanic("do_cse_BB(1)"); 3407 } 3408 3409 if (paranoia > 0) { 3410 for (j = 0; j < aenv->used; j++) { 3411 if (!aenv->inuse[j]) 3412 continue; 3413 ae = (AvailExpr*)aenv->key[j]; 3414 if (ae->tag != GetIt) 3415 continue; 3416 invalidate = False; 3417 if (paranoia >= 2) { 3418 invalidate = True; 3419 } else { 3420 vassert(paranoia == 1); 3421 if (st->tag == Ist_Put) { 3422 if (getAliasingRelation_IC( 3423 ae->u.GetIt.descr, 3424 IRExpr_RdTmp(ae->u.GetIt.ix), 3425 st->Ist.Put.offset, 3426 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) 3427 ) != NoAlias) 3428 invalidate = True; 3429 } 3430 else 3431 if (st->tag == Ist_PutI) { 3432 IRPutI *puti = st->Ist.PutI.details; 3433 if (getAliasingRelation_II( 3434 ae->u.GetIt.descr, 3435 IRExpr_RdTmp(ae->u.GetIt.ix), 3436 ae->u.GetIt.bias, 3437 puti->descr, 3438 puti->ix, 3439 puti->bias 3440 ) != NoAlias) 3441 invalidate = True; 3442 } 3443 else 3444 vpanic("do_cse_BB(2)"); 3445 } 3446 3447 if (invalidate) { 3448 aenv->inuse[j] = False; 3449 aenv->key[j] = (HWord)NULL; /* be sure */ 3450 } 3451 } /* for j */ 3452 } /* paranoia > 0 */ 3453 3454 /* ------ ENV invalidate aenv bindings ------ */ 3455 3456 /* ignore not-interestings */ 3457 if (st->tag != Ist_WrTmp) 3458 continue; 3459 3460 t = st->Ist.WrTmp.tmp; 3461 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data); 3462 /* ignore if not of AvailExpr form */ 3463 if (!eprime) 3464 continue; 3465 3466 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */ 3467 3468 /* apply tenv */ 3469 subst_AvailExpr( tenv, eprime ); 3470 3471 /* search aenv for eprime, unfortunately the hard way */ 3472 for (j = 0; j < aenv->used; j++) 3473 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j])) 3474 break; 3475 3476 if (j < aenv->used) { 3477 /* A binding E' -> q was found. Replace stmt by "t = q" and 3478 note the t->q binding in tenv. */ 3479 /* (this is the core of the CSE action) */ 3480 q = (IRTemp)aenv->val[j]; 3481 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); 3482 addToHHW( tenv, (HWord)t, (HWord)q ); 3483 anyDone = True; 3484 } else { 3485 /* No binding was found, so instead we add E' -> t to our 3486 collection of available expressions, replace this stmt 3487 with "t = E'", and move on. */ 3488 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); 3489 addToHHW( aenv, (HWord)eprime, (HWord)t ); 3490 } 3491 } 3492 3493 /* 3494 ppIRSB(bb); 3495 sanityCheckIRSB(bb, Ity_I32); 3496 vex_printf("\n\n"); 3497 */ 3498 return anyDone; 3499 } 3500 3501 3502 /*---------------------------------------------------------------*/ 3503 /*--- Add32/Sub32 chain collapsing ---*/ 3504 /*---------------------------------------------------------------*/ 3505 3506 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */ 3507 3508 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If 3509 yes, set *tmp and *i32 appropriately. *i32 is set as if the 3510 root node is Add32, not Sub32. */ 3511 3512 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) 3513 { 3514 if (e->tag != Iex_Binop) 3515 return False; 3516 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32) 3517 return False; 3518 if (e->Iex.Binop.arg1->tag != Iex_RdTmp) 3519 return False; 3520 if (e->Iex.Binop.arg2->tag != Iex_Const) 3521 return False; 3522 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3523 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32); 3524 if (e->Iex.Binop.op == Iop_Sub32) 3525 *i32 = -*i32; 3526 return True; 3527 } 3528 3529 3530 /* Figure out if tmp can be expressed as tmp2 +32 const, for some 3531 other tmp2. Scan backwards from the specified start point -- an 3532 optimisation. */ 3533 3534 static Bool collapseChain ( IRSB* bb, Int startHere, 3535 IRTemp tmp, 3536 IRTemp* tmp2, Int* i32 ) 3537 { 3538 Int j, ii; 3539 IRTemp vv; 3540 IRStmt* st; 3541 IRExpr* e; 3542 3543 /* the (var, con) pair contain the current 'representation' for 3544 'tmp'. We start with 'tmp + 0'. */ 3545 IRTemp var = tmp; 3546 Int con = 0; 3547 3548 /* Scan backwards to see if tmp can be replaced by some other tmp 3549 +/- a constant. */ 3550 for (j = startHere; j >= 0; j--) { 3551 st = bb->stmts[j]; 3552 if (st->tag != Ist_WrTmp) 3553 continue; 3554 if (st->Ist.WrTmp.tmp != var) 3555 continue; 3556 e = st->Ist.WrTmp.data; 3557 if (!isAdd32OrSub32(e, &vv, &ii)) 3558 break; 3559 var = vv; 3560 con += ii; 3561 } 3562 if (j == -1) 3563 /* no earlier binding for var .. ill-formed IR */ 3564 vpanic("collapseChain"); 3565 3566 /* so, did we find anything interesting? */ 3567 if (var == tmp) 3568 return False; /* no .. */ 3569 3570 *tmp2 = var; 3571 *i32 = con; 3572 return True; 3573 } 3574 3575 3576 /* ------- Main function for Add32/Sub32 chain collapsing ------ */ 3577 3578 static void collapse_AddSub_chains_BB ( IRSB* bb ) 3579 { 3580 IRStmt *st; 3581 IRTemp var, var2; 3582 Int i, con, con2; 3583 3584 for (i = bb->stmts_used-1; i >= 0; i--) { 3585 st = bb->stmts[i]; 3586 if (st->tag == Ist_NoOp) 3587 continue; 3588 3589 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */ 3590 3591 if (st->tag == Ist_WrTmp 3592 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) { 3593 3594 /* So e1 is of the form Add32(var,con) or Sub32(var,-con). 3595 Find out if var can be expressed as var2 + con2. */ 3596 if (collapseChain(bb, i-1, var, &var2, &con2)) { 3597 if (DEBUG_IROPT) { 3598 vex_printf("replacing1 "); 3599 ppIRStmt(st); 3600 vex_printf(" with "); 3601 } 3602 con2 += con; 3603 bb->stmts[i] 3604 = IRStmt_WrTmp( 3605 st->Ist.WrTmp.tmp, 3606 (con2 >= 0) 3607 ? IRExpr_Binop(Iop_Add32, 3608 IRExpr_RdTmp(var2), 3609 IRExpr_Const(IRConst_U32(con2))) 3610 : IRExpr_Binop(Iop_Sub32, 3611 IRExpr_RdTmp(var2), 3612 IRExpr_Const(IRConst_U32(-con2))) 3613 ); 3614 if (DEBUG_IROPT) { 3615 ppIRStmt(bb->stmts[i]); 3616 vex_printf("\n"); 3617 } 3618 } 3619 3620 continue; 3621 } 3622 3623 /* Try to collapse 't1 = GetI[t2, con]'. */ 3624 3625 if (st->tag == Ist_WrTmp 3626 && st->Ist.WrTmp.data->tag == Iex_GetI 3627 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp 3628 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix 3629 ->Iex.RdTmp.tmp, &var2, &con2)) { 3630 if (DEBUG_IROPT) { 3631 vex_printf("replacing3 "); 3632 ppIRStmt(st); 3633 vex_printf(" with "); 3634 } 3635 con2 += st->Ist.WrTmp.data->Iex.GetI.bias; 3636 bb->stmts[i] 3637 = IRStmt_WrTmp( 3638 st->Ist.WrTmp.tmp, 3639 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, 3640 IRExpr_RdTmp(var2), 3641 con2)); 3642 if (DEBUG_IROPT) { 3643 ppIRStmt(bb->stmts[i]); 3644 vex_printf("\n"); 3645 } 3646 continue; 3647 } 3648 3649 /* Perhaps st is PutI[t, con] ? */ 3650 IRPutI *puti = st->Ist.PutI.details; 3651 if (st->tag == Ist_PutI 3652 && puti->ix->tag == Iex_RdTmp 3653 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp, 3654 &var2, &con2)) { 3655 if (DEBUG_IROPT) { 3656 vex_printf("replacing2 "); 3657 ppIRStmt(st); 3658 vex_printf(" with "); 3659 } 3660 con2 += puti->bias; 3661 bb->stmts[i] 3662 = IRStmt_PutI(mkIRPutI(puti->descr, 3663 IRExpr_RdTmp(var2), 3664 con2, 3665 puti->data)); 3666 if (DEBUG_IROPT) { 3667 ppIRStmt(bb->stmts[i]); 3668 vex_printf("\n"); 3669 } 3670 continue; 3671 } 3672 3673 } /* for */ 3674 } 3675 3676 3677 /*---------------------------------------------------------------*/ 3678 /*--- PutI/GetI transformations ---*/ 3679 /*---------------------------------------------------------------*/ 3680 3681 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from 3682 the given starting point to find, if any, a PutI which writes 3683 exactly the same piece of guest state, and so return the expression 3684 that the PutI writes. This is the core of PutI-GetI forwarding. */ 3685 3686 static 3687 IRExpr* findPutI ( IRSB* bb, Int startHere, 3688 IRRegArray* descrG, IRExpr* ixG, Int biasG ) 3689 { 3690 Int j; 3691 IRStmt* st; 3692 GSAliasing relation; 3693 3694 if (0) { 3695 vex_printf("\nfindPutI "); 3696 ppIRRegArray(descrG); 3697 vex_printf(" "); 3698 ppIRExpr(ixG); 3699 vex_printf(" %d\n", biasG); 3700 } 3701 3702 /* Scan backwards in bb from startHere to find a suitable PutI 3703 binding for (descrG, ixG, biasG), if any. */ 3704 3705 for (j = startHere; j >= 0; j--) { 3706 st = bb->stmts[j]; 3707 if (st->tag == Ist_NoOp) 3708 continue; 3709 3710 if (st->tag == Ist_Put) { 3711 /* Non-indexed Put. This can't give a binding, but we do 3712 need to check it doesn't invalidate the search by 3713 overlapping any part of the indexed guest state. */ 3714 3715 relation 3716 = getAliasingRelation_IC( 3717 descrG, ixG, 3718 st->Ist.Put.offset, 3719 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 3720 3721 if (relation == NoAlias) { 3722 /* we're OK; keep going */ 3723 continue; 3724 } else { 3725 /* relation == UnknownAlias || relation == ExactAlias */ 3726 /* If this assertion fails, we've found a Put which writes 3727 an area of guest state which is read by a GetI. Which 3728 is unlikely (although not per se wrong). */ 3729 vassert(relation != ExactAlias); 3730 /* This Put potentially writes guest state that the GetI 3731 reads; we must fail. */ 3732 return NULL; 3733 } 3734 } 3735 3736 if (st->tag == Ist_PutI) { 3737 IRPutI *puti = st->Ist.PutI.details; 3738 3739 relation = getAliasingRelation_II( 3740 descrG, ixG, biasG, 3741 puti->descr, 3742 puti->ix, 3743 puti->bias 3744 ); 3745 3746 if (relation == NoAlias) { 3747 /* This PutI definitely doesn't overlap. Ignore it and 3748 keep going. */ 3749 continue; /* the for j loop */ 3750 } 3751 3752 if (relation == UnknownAlias) { 3753 /* We don't know if this PutI writes to the same guest 3754 state that the GetI, or not. So we have to give up. */ 3755 return NULL; 3756 } 3757 3758 /* Otherwise, we've found what we're looking for. */ 3759 vassert(relation == ExactAlias); 3760 return puti->data; 3761 3762 } /* if (st->tag == Ist_PutI) */ 3763 3764 if (st->tag == Ist_Dirty) { 3765 /* Be conservative. If the dirty call has any guest effects at 3766 all, give up. We could do better -- only give up if there 3767 are any guest writes/modifies. */ 3768 if (st->Ist.Dirty.details->nFxState > 0) 3769 return NULL; 3770 } 3771 3772 } /* for */ 3773 3774 /* No valid replacement was found. */ 3775 return NULL; 3776 } 3777 3778 3779 3780 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense 3781 that it writes exactly the same piece of guest state) ? Safe 3782 answer: False. */ 3783 3784 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 ) 3785 { 3786 vassert(pi->tag == Ist_PutI); 3787 if (s2->tag != Ist_PutI) 3788 return False; 3789 3790 IRPutI *p1 = pi->Ist.PutI.details; 3791 IRPutI *p2 = s2->Ist.PutI.details; 3792 3793 return toBool( 3794 getAliasingRelation_II( 3795 p1->descr, p1->ix, p1->bias, 3796 p2->descr, p2->ix, p2->bias 3797 ) 3798 == ExactAlias 3799 ); 3800 } 3801 3802 3803 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might 3804 overlap it? Safe answer: True. Note, we could do a lot better 3805 than this if needed. */ 3806 3807 static 3808 Bool guestAccessWhichMightOverlapPutI ( 3809 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2 3810 ) 3811 { 3812 GSAliasing relation; 3813 UInt minoffP, maxoffP; 3814 3815 vassert(pi->tag == Ist_PutI); 3816 3817 IRPutI *p1 = pi->Ist.PutI.details; 3818 3819 getArrayBounds(p1->descr, &minoffP, &maxoffP); 3820 switch (s2->tag) { 3821 3822 case Ist_NoOp: 3823 case Ist_IMark: 3824 return False; 3825 3826 case Ist_MBE: 3827 case Ist_AbiHint: 3828 /* just be paranoid ... these should be rare. */ 3829 return True; 3830 3831 case Ist_CAS: 3832 /* This is unbelievably lame, but it's probably not 3833 significant from a performance point of view. Really, a 3834 CAS is a load-store op, so it should be safe to say False. 3835 However .. */ 3836 return True; 3837 3838 case Ist_Dirty: 3839 /* If the dirty call has any guest effects at all, give up. 3840 Probably could do better. */ 3841 if (s2->Ist.Dirty.details->nFxState > 0) 3842 return True; 3843 return False; 3844 3845 case Ist_Put: 3846 vassert(isIRAtom(s2->Ist.Put.data)); 3847 relation 3848 = getAliasingRelation_IC( 3849 p1->descr, p1->ix, 3850 s2->Ist.Put.offset, 3851 typeOfIRExpr(tyenv,s2->Ist.Put.data) 3852 ); 3853 goto have_relation; 3854 3855 case Ist_PutI: { 3856 IRPutI *p2 = s2->Ist.PutI.details; 3857 3858 vassert(isIRAtom(p2->ix)); 3859 vassert(isIRAtom(p2->data)); 3860 relation 3861 = getAliasingRelation_II( 3862 p1->descr, p1->ix, p1->bias, 3863 p2->descr, p2->ix, p2->bias 3864 ); 3865 goto have_relation; 3866 } 3867 3868 case Ist_WrTmp: 3869 if (s2->Ist.WrTmp.data->tag == Iex_GetI) { 3870 relation 3871 = getAliasingRelation_II( 3872 p1->descr, p1->ix, p1->bias, 3873 s2->Ist.WrTmp.data->Iex.GetI.descr, 3874 s2->Ist.WrTmp.data->Iex.GetI.ix, 3875 s2->Ist.WrTmp.data->Iex.GetI.bias 3876 ); 3877 goto have_relation; 3878 } 3879 if (s2->Ist.WrTmp.data->tag == Iex_Get) { 3880 relation 3881 = getAliasingRelation_IC( 3882 p1->descr, p1->ix, 3883 s2->Ist.WrTmp.data->Iex.Get.offset, 3884 s2->Ist.WrTmp.data->Iex.Get.ty 3885 ); 3886 goto have_relation; 3887 } 3888 return False; 3889 3890 case Ist_Store: 3891 vassert(isIRAtom(s2->Ist.Store.addr)); 3892 vassert(isIRAtom(s2->Ist.Store.data)); 3893 return False; 3894 3895 default: 3896 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n"); 3897 vpanic("guestAccessWhichMightOverlapPutI"); 3898 } 3899 3900 have_relation: 3901 if (relation == NoAlias) 3902 return False; 3903 else 3904 return True; /* ExactAlias or UnknownAlias */ 3905 } 3906 3907 3908 3909 /* ---------- PutI/GetI transformations main functions --------- */ 3910 3911 /* Remove redundant GetIs, to the extent that they can be detected. 3912 bb is modified in-place. */ 3913 3914 static 3915 void do_redundant_GetI_elimination ( IRSB* bb ) 3916 { 3917 Int i; 3918 IRStmt* st; 3919 3920 for (i = bb->stmts_used-1; i >= 0; i--) { 3921 st = bb->stmts[i]; 3922 if (st->tag == Ist_NoOp) 3923 continue; 3924 3925 if (st->tag == Ist_WrTmp 3926 && st->Ist.WrTmp.data->tag == Iex_GetI 3927 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) { 3928 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr; 3929 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix; 3930 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias; 3931 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias); 3932 if (replacement 3933 && isIRAtom(replacement) 3934 /* Make sure we're doing a type-safe transformation! */ 3935 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) { 3936 if (DEBUG_IROPT) { 3937 vex_printf("rGI: "); 3938 ppIRExpr(st->Ist.WrTmp.data); 3939 vex_printf(" -> "); 3940 ppIRExpr(replacement); 3941 vex_printf("\n"); 3942 } 3943 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement); 3944 } 3945 } 3946 } 3947 3948 } 3949 3950 3951 /* Remove redundant PutIs, to the extent which they can be detected. 3952 bb is modified in-place. */ 3953 3954 static 3955 void do_redundant_PutI_elimination ( IRSB* bb ) 3956 { 3957 Int i, j; 3958 Bool delete; 3959 IRStmt *st, *stj; 3960 3961 vassert 3962 (vex_control.iropt_register_updates == VexRegUpdUnwindregsAtMemAccess 3963 || vex_control.iropt_register_updates == VexRegUpdAllregsAtMemAccess); 3964 3965 for (i = 0; i < bb->stmts_used; i++) { 3966 st = bb->stmts[i]; 3967 if (st->tag != Ist_PutI) 3968 continue; 3969 /* Ok, search forwards from here to see if we can find another 3970 PutI which makes this one redundant, and dodging various 3971 hazards. Search forwards: 3972 * If conditional exit, give up (because anything after that 3973 does not postdominate this put). 3974 * If a Get which might overlap, give up (because this PutI 3975 not necessarily dead). 3976 * If a Put which is identical, stop with success. 3977 * If a Put which might overlap, but is not identical, give up. 3978 * If a dirty helper call which might write guest state, give up. 3979 * If a Put which definitely doesn't overlap, or any other 3980 kind of stmt, continue. 3981 */ 3982 delete = False; 3983 for (j = i+1; j < bb->stmts_used; j++) { 3984 stj = bb->stmts[j]; 3985 if (stj->tag == Ist_NoOp) 3986 continue; 3987 if (identicalPutIs(st, stj)) { 3988 /* success! */ 3989 delete = True; 3990 break; 3991 } 3992 if (stj->tag == Ist_Exit) 3993 /* give up */ 3994 break; 3995 if (st->tag == Ist_Dirty) 3996 /* give up; could do better here */ 3997 break; 3998 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj)) 3999 /* give up */ 4000 break; 4001 } 4002 4003 if (delete) { 4004 if (DEBUG_IROPT) { 4005 vex_printf("rPI: "); 4006 ppIRStmt(st); 4007 vex_printf("\n"); 4008 } 4009 bb->stmts[i] = IRStmt_NoOp(); 4010 } 4011 4012 } 4013 } 4014 4015 4016 /*---------------------------------------------------------------*/ 4017 /*--- Loop unrolling ---*/ 4018 /*---------------------------------------------------------------*/ 4019 4020 /* Adjust all tmp values (names) in e by delta. e is destructively 4021 modified. */ 4022 4023 static void deltaIRExpr ( IRExpr* e, Int delta ) 4024 { 4025 Int i; 4026 switch (e->tag) { 4027 case Iex_RdTmp: 4028 e->Iex.RdTmp.tmp += delta; 4029 break; 4030 case Iex_Get: 4031 case Iex_Const: 4032 break; 4033 case Iex_GetI: 4034 deltaIRExpr(e->Iex.GetI.ix, delta); 4035 break; 4036 case Iex_Qop: 4037 deltaIRExpr(e->Iex.Qop.details->arg1, delta); 4038 deltaIRExpr(e->Iex.Qop.details->arg2, delta); 4039 deltaIRExpr(e->Iex.Qop.details->arg3, delta); 4040 deltaIRExpr(e->Iex.Qop.details->arg4, delta); 4041 break; 4042 case Iex_Triop: 4043 deltaIRExpr(e->Iex.Triop.details->arg1, delta); 4044 deltaIRExpr(e->Iex.Triop.details->arg2, delta); 4045 deltaIRExpr(e->Iex.Triop.details->arg3, delta); 4046 break; 4047 case Iex_Binop: 4048 deltaIRExpr(e->Iex.Binop.arg1, delta); 4049 deltaIRExpr(e->Iex.Binop.arg2, delta); 4050 break; 4051 case Iex_Unop: 4052 deltaIRExpr(e->Iex.Unop.arg, delta); 4053 break; 4054 case Iex_Load: 4055 deltaIRExpr(e->Iex.Load.addr, delta); 4056 break; 4057 case Iex_CCall: 4058 for (i = 0; e->Iex.CCall.args[i]; i++) 4059 deltaIRExpr(e->Iex.CCall.args[i], delta); 4060 break; 4061 case Iex_Mux0X: 4062 deltaIRExpr(e->Iex.Mux0X.cond, delta); 4063 deltaIRExpr(e->Iex.Mux0X.expr0, delta); 4064 deltaIRExpr(e->Iex.Mux0X.exprX, delta); 4065 break; 4066 default: 4067 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4068 vpanic("deltaIRExpr"); 4069 } 4070 } 4071 4072 /* Adjust all tmp values (names) in st by delta. st is destructively 4073 modified. */ 4074 4075 static void deltaIRStmt ( IRStmt* st, Int delta ) 4076 { 4077 Int i; 4078 IRDirty* d; 4079 switch (st->tag) { 4080 case Ist_NoOp: 4081 case Ist_IMark: 4082 case Ist_MBE: 4083 break; 4084 case Ist_AbiHint: 4085 deltaIRExpr(st->Ist.AbiHint.base, delta); 4086 deltaIRExpr(st->Ist.AbiHint.nia, delta); 4087 break; 4088 case Ist_Put: 4089 deltaIRExpr(st->Ist.Put.data, delta); 4090 break; 4091 case Ist_PutI: 4092 deltaIRExpr(st->Ist.PutI.details->ix, delta); 4093 deltaIRExpr(st->Ist.PutI.details->data, delta); 4094 break; 4095 case Ist_WrTmp: 4096 st->Ist.WrTmp.tmp += delta; 4097 deltaIRExpr(st->Ist.WrTmp.data, delta); 4098 break; 4099 case Ist_Exit: 4100 deltaIRExpr(st->Ist.Exit.guard, delta); 4101 break; 4102 case Ist_Store: 4103 deltaIRExpr(st->Ist.Store.addr, delta); 4104 deltaIRExpr(st->Ist.Store.data, delta); 4105 break; 4106 case Ist_CAS: 4107 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID) 4108 st->Ist.CAS.details->oldHi += delta; 4109 st->Ist.CAS.details->oldLo += delta; 4110 deltaIRExpr(st->Ist.CAS.details->addr, delta); 4111 if (st->Ist.CAS.details->expdHi) 4112 deltaIRExpr(st->Ist.CAS.details->expdHi, delta); 4113 deltaIRExpr(st->Ist.CAS.details->expdLo, delta); 4114 if (st->Ist.CAS.details->dataHi) 4115 deltaIRExpr(st->Ist.CAS.details->dataHi, delta); 4116 deltaIRExpr(st->Ist.CAS.details->dataLo, delta); 4117 break; 4118 case Ist_LLSC: 4119 st->Ist.LLSC.result += delta; 4120 deltaIRExpr(st->Ist.LLSC.addr, delta); 4121 if (st->Ist.LLSC.storedata) 4122 deltaIRExpr(st->Ist.LLSC.storedata, delta); 4123 break; 4124 case Ist_Dirty: 4125 d = st->Ist.Dirty.details; 4126 deltaIRExpr(d->guard, delta); 4127 for (i = 0; d->args[i]; i++) 4128 deltaIRExpr(d->args[i], delta); 4129 if (d->tmp != IRTemp_INVALID) 4130 d->tmp += delta; 4131 if (d->mAddr) 4132 deltaIRExpr(d->mAddr, delta); 4133 break; 4134 default: 4135 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4136 vpanic("deltaIRStmt"); 4137 } 4138 } 4139 4140 4141 /* If possible, return a loop-unrolled version of bb0. The original 4142 is changed. If not possible, return NULL. */ 4143 4144 /* The two schemas considered are: 4145 4146 X: BODY; goto X 4147 4148 which unrolls to (eg) X: BODY;BODY; goto X 4149 4150 and 4151 4152 X: BODY; if (c) goto X; goto Y 4153 which trivially transforms to 4154 X: BODY; if (!c) goto Y; goto X; 4155 so it falls in the scope of the first case. 4156 4157 X and Y must be literal (guest) addresses. 4158 */ 4159 4160 static Int calc_unroll_factor( IRSB* bb ) 4161 { 4162 Int n_stmts, i; 4163 4164 n_stmts = 0; 4165 for (i = 0; i < bb->stmts_used; i++) { 4166 if (bb->stmts[i]->tag != Ist_NoOp) 4167 n_stmts++; 4168 } 4169 4170 if (n_stmts <= vex_control.iropt_unroll_thresh/8) { 4171 if (vex_control.iropt_verbosity > 0) 4172 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n", 4173 n_stmts, 8* n_stmts); 4174 return 8; 4175 } 4176 if (n_stmts <= vex_control.iropt_unroll_thresh/4) { 4177 if (vex_control.iropt_verbosity > 0) 4178 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n", 4179 n_stmts, 4* n_stmts); 4180 return 4; 4181 } 4182 4183 if (n_stmts <= vex_control.iropt_unroll_thresh/2) { 4184 if (vex_control.iropt_verbosity > 0) 4185 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n", 4186 n_stmts, 2* n_stmts); 4187 return 2; 4188 } 4189 4190 if (vex_control.iropt_verbosity > 0) 4191 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts); 4192 4193 return 1; 4194 } 4195 4196 4197 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr ) 4198 { 4199 Int i, j, jmax, n_vars; 4200 Bool xxx_known; 4201 Addr64 xxx_value, yyy_value; 4202 IRExpr* udst; 4203 IRStmt* st; 4204 IRConst* con; 4205 IRSB *bb1, *bb2; 4206 Int unroll_factor; 4207 4208 if (vex_control.iropt_unroll_thresh <= 0) 4209 return NULL; 4210 4211 /* First off, figure out if we can unroll this loop. Do this 4212 without modifying bb0. */ 4213 4214 if (bb0->jumpkind != Ijk_Boring) 4215 return NULL; 4216 4217 xxx_known = False; 4218 xxx_value = 0; 4219 4220 /* Extract the next-guest address. If it isn't a literal, we 4221 have to give up. */ 4222 4223 udst = bb0->next; 4224 if (udst->tag == Iex_Const 4225 && (udst->Iex.Const.con->tag == Ico_U32 4226 || udst->Iex.Const.con->tag == Ico_U64)) { 4227 /* The BB ends in a jump to a literal location. */ 4228 xxx_known = True; 4229 xxx_value = udst->Iex.Const.con->tag == Ico_U64 4230 ? udst->Iex.Const.con->Ico.U64 4231 : (Addr64)(udst->Iex.Const.con->Ico.U32); 4232 } 4233 4234 if (!xxx_known) 4235 return NULL; 4236 4237 /* Now we know the BB ends to a jump to a literal location. If 4238 it's a jump to itself (viz, idiom #1), move directly to the 4239 unrolling stage, first cloning the bb so the original isn't 4240 modified. */ 4241 if (xxx_value == my_addr) { 4242 unroll_factor = calc_unroll_factor( bb0 ); 4243 if (unroll_factor < 2) 4244 return NULL; 4245 bb1 = deepCopyIRSB( bb0 ); 4246 bb0 = NULL; 4247 udst = NULL; /* is now invalid */ 4248 goto do_unroll; 4249 } 4250 4251 /* Search for the second idiomatic form: 4252 X: BODY; if (c) goto X; goto Y 4253 We know Y, but need to establish that the last stmt 4254 is 'if (c) goto X'. 4255 */ 4256 yyy_value = xxx_value; 4257 for (i = bb0->stmts_used-1; i >= 0; i--) 4258 if (bb0->stmts[i]) 4259 break; 4260 4261 if (i < 0) 4262 return NULL; /* block with no stmts. Strange. */ 4263 4264 st = bb0->stmts[i]; 4265 if (st->tag != Ist_Exit) 4266 return NULL; 4267 if (st->Ist.Exit.jk != Ijk_Boring) 4268 return NULL; 4269 4270 con = st->Ist.Exit.dst; 4271 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4272 4273 xxx_value = con->tag == Ico_U64 4274 ? st->Ist.Exit.dst->Ico.U64 4275 : (Addr64)(st->Ist.Exit.dst->Ico.U32); 4276 4277 /* If this assertion fails, we have some kind of type error. */ 4278 vassert(con->tag == udst->Iex.Const.con->tag); 4279 4280 if (xxx_value != my_addr) 4281 /* We didn't find either idiom. Give up. */ 4282 return NULL; 4283 4284 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and 4285 yyy values (which makes it look like idiom #1), and go into 4286 unrolling proper. This means finding (again) the last stmt, in 4287 the copied BB. */ 4288 4289 unroll_factor = calc_unroll_factor( bb0 ); 4290 if (unroll_factor < 2) 4291 return NULL; 4292 4293 bb1 = deepCopyIRSB( bb0 ); 4294 bb0 = NULL; 4295 udst = NULL; /* is now invalid */ 4296 for (i = bb1->stmts_used-1; i >= 0; i--) 4297 if (bb1->stmts[i]) 4298 break; 4299 4300 /* The next bunch of assertions should be true since we already 4301 found and checked the last stmt in the original bb. */ 4302 4303 vassert(i >= 0); 4304 4305 st = bb1->stmts[i]; 4306 vassert(st->tag == Ist_Exit); 4307 4308 con = st->Ist.Exit.dst; 4309 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4310 4311 udst = bb1->next; 4312 vassert(udst->tag == Iex_Const); 4313 vassert(udst->Iex.Const.con->tag == Ico_U32 4314 || udst->Iex.Const.con->tag == Ico_U64); 4315 vassert(con->tag == udst->Iex.Const.con->tag); 4316 4317 /* switch the xxx and yyy fields around */ 4318 if (con->tag == Ico_U64) { 4319 udst->Iex.Const.con->Ico.U64 = xxx_value; 4320 con->Ico.U64 = yyy_value; 4321 } else { 4322 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value; 4323 con->Ico.U32 = (UInt)yyy_value; 4324 } 4325 4326 /* negate the test condition */ 4327 st->Ist.Exit.guard 4328 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard)); 4329 4330 /* --- The unroller proper. Both idioms are by now --- */ 4331 /* --- now converted to idiom 1. --- */ 4332 4333 do_unroll: 4334 4335 vassert(unroll_factor == 2 4336 || unroll_factor == 4 4337 || unroll_factor == 8); 4338 4339 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1); 4340 for (j = 1; j <= jmax; j++) { 4341 4342 n_vars = bb1->tyenv->types_used; 4343 4344 bb2 = deepCopyIRSB(bb1); 4345 for (i = 0; i < n_vars; i++) 4346 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]); 4347 4348 for (i = 0; i < bb2->stmts_used; i++) { 4349 /* deltaIRStmt destructively modifies the stmt, but 4350 that's OK since bb2 is a complete fresh copy of bb1. */ 4351 deltaIRStmt(bb2->stmts[i], n_vars); 4352 addStmtToIRSB(bb1, bb2->stmts[i]); 4353 } 4354 } 4355 4356 if (DEBUG_IROPT) { 4357 vex_printf("\nUNROLLED (%llx)\n", my_addr); 4358 ppIRSB(bb1); 4359 vex_printf("\n"); 4360 } 4361 4362 /* Flattening; sigh. The unroller succeeds in breaking flatness 4363 by negating the test condition. This should be fixed properly. 4364 For the moment use this shotgun approach. */ 4365 return flatten_BB(bb1); 4366 } 4367 4368 4369 /*---------------------------------------------------------------*/ 4370 /*--- The tree builder ---*/ 4371 /*---------------------------------------------------------------*/ 4372 4373 /* This isn't part of IR optimisation. Really it's a pass done prior 4374 to instruction selection, which improves the code that the 4375 instruction selector can produce. */ 4376 4377 /* --- The 'tmp' environment is the central data structure here --- */ 4378 4379 /* The number of outstanding bindings we're prepared to track. 4380 The number of times the env becomes full and we have to dump 4381 the oldest binding (hence reducing code quality) falls very 4382 rapidly as the env size increases. 8 gives reasonable performance 4383 under most circumstances. */ 4384 #define A_NENV 10 4385 4386 /* bindee == NULL === slot is not in use 4387 bindee != NULL === slot is in use 4388 */ 4389 typedef 4390 struct { 4391 IRTemp binder; 4392 IRExpr* bindee; 4393 Bool doesLoad; 4394 Bool doesGet; 4395 } 4396 ATmpInfo; 4397 4398 __attribute__((unused)) 4399 static void ppAEnv ( ATmpInfo* env ) 4400 { 4401 Int i; 4402 for (i = 0; i < A_NENV; i++) { 4403 vex_printf("%d tmp %d val ", i, (Int)env[i].binder); 4404 if (env[i].bindee) 4405 ppIRExpr(env[i].bindee); 4406 else 4407 vex_printf("(null)"); 4408 vex_printf("\n"); 4409 } 4410 } 4411 4412 /* --- Tree-traversal fns --- */ 4413 4414 /* Traverse an expr, and detect if any part of it reads memory or does 4415 a Get. Be careful ... this really controls how much the 4416 tree-builder can reorder the code, so getting it right is critical. 4417 */ 4418 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e ) 4419 { 4420 Int i; 4421 switch (e->tag) { 4422 case Iex_CCall: 4423 for (i = 0; e->Iex.CCall.args[i]; i++) 4424 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]); 4425 return; 4426 case Iex_Mux0X: 4427 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond); 4428 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0); 4429 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX); 4430 return; 4431 case Iex_Qop: 4432 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg1); 4433 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg2); 4434 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg3); 4435 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg4); 4436 return; 4437 case Iex_Triop: 4438 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg1); 4439 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg2); 4440 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg3); 4441 return; 4442 case Iex_Binop: 4443 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1); 4444 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2); 4445 return; 4446 case Iex_Unop: 4447 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg); 4448 return; 4449 case Iex_Load: 4450 *doesLoad = True; 4451 setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr); 4452 return; 4453 case Iex_Get: 4454 *doesGet = True; 4455 return; 4456 case Iex_GetI: 4457 *doesGet = True; 4458 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix); 4459 return; 4460 case Iex_RdTmp: 4461 case Iex_Const: 4462 return; 4463 default: 4464 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4465 vpanic("setHints_Expr"); 4466 } 4467 } 4468 4469 4470 /* Add a binding to the front of the env and slide all the rest 4471 backwards. It should be the case that the last slot is free. */ 4472 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee ) 4473 { 4474 Int i; 4475 vassert(env[A_NENV-1].bindee == NULL); 4476 for (i = A_NENV-1; i >= 1; i--) 4477 env[i] = env[i-1]; 4478 env[0].binder = binder; 4479 env[0].bindee = bindee; 4480 env[0].doesLoad = False; /* filled in later */ 4481 env[0].doesGet = False; /* filled in later */ 4482 } 4483 4484 /* Given uses :: array of UShort, indexed by IRTemp 4485 Add the use-occurrences of temps in this expression 4486 to the env. 4487 */ 4488 static void aoccCount_Expr ( UShort* uses, IRExpr* e ) 4489 { 4490 Int i; 4491 4492 switch (e->tag) { 4493 4494 case Iex_RdTmp: /* the only interesting case */ 4495 uses[e->Iex.RdTmp.tmp]++; 4496 return; 4497 4498 case Iex_Mux0X: 4499 aoccCount_Expr(uses, e->Iex.Mux0X.cond); 4500 aoccCount_Expr(uses, e->Iex.Mux0X.expr0); 4501 aoccCount_Expr(uses, e->Iex.Mux0X.exprX); 4502 return; 4503 4504 case Iex_Qop: 4505 aoccCount_Expr(uses, e->Iex.Qop.details->arg1); 4506 aoccCount_Expr(uses, e->Iex.Qop.details->arg2); 4507 aoccCount_Expr(uses, e->Iex.Qop.details->arg3); 4508 aoccCount_Expr(uses, e->Iex.Qop.details->arg4); 4509 return; 4510 4511 case Iex_Triop: 4512 aoccCount_Expr(uses, e->Iex.Triop.details->arg1); 4513 aoccCount_Expr(uses, e->Iex.Triop.details->arg2); 4514 aoccCount_Expr(uses, e->Iex.Triop.details->arg3); 4515 return; 4516 4517 case Iex_Binop: 4518 aoccCount_Expr(uses, e->Iex.Binop.arg1); 4519 aoccCount_Expr(uses, e->Iex.Binop.arg2); 4520 return; 4521 4522 case Iex_Unop: 4523 aoccCount_Expr(uses, e->Iex.Unop.arg); 4524 return; 4525 4526 case Iex_Load: 4527 aoccCount_Expr(uses, e->Iex.Load.addr); 4528 return; 4529 4530 case Iex_CCall: 4531 for (i = 0; e->Iex.CCall.args[i]; i++) 4532 aoccCount_Expr(uses, e->Iex.CCall.args[i]); 4533 return; 4534 4535 case Iex_GetI: 4536 aoccCount_Expr(uses, e->Iex.GetI.ix); 4537 return; 4538 4539 case Iex_Const: 4540 case Iex_Get: 4541 return; 4542 4543 default: 4544 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4545 vpanic("aoccCount_Expr"); 4546 } 4547 } 4548 4549 4550 /* Given uses :: array of UShort, indexed by IRTemp 4551 Add the use-occurrences of temps in this statement 4552 to the env. 4553 */ 4554 static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) 4555 { 4556 Int i; 4557 IRDirty* d; 4558 IRCAS* cas; 4559 switch (st->tag) { 4560 case Ist_AbiHint: 4561 aoccCount_Expr(uses, st->Ist.AbiHint.base); 4562 aoccCount_Expr(uses, st->Ist.AbiHint.nia); 4563 return; 4564 case Ist_WrTmp: 4565 aoccCount_Expr(uses, st->Ist.WrTmp.data); 4566 return; 4567 case Ist_Put: 4568 aoccCount_Expr(uses, st->Ist.Put.data); 4569 return; 4570 case Ist_PutI: 4571 aoccCount_Expr(uses, st->Ist.PutI.details->ix); 4572 aoccCount_Expr(uses, st->Ist.PutI.details->data); 4573 return; 4574 case Ist_Store: 4575 aoccCount_Expr(uses, st->Ist.Store.addr); 4576 aoccCount_Expr(uses, st->Ist.Store.data); 4577 return; 4578 case Ist_CAS: 4579 cas = st->Ist.CAS.details; 4580 aoccCount_Expr(uses, cas->addr); 4581 if (cas->expdHi) 4582 aoccCount_Expr(uses, cas->expdHi); 4583 aoccCount_Expr(uses, cas->expdLo); 4584 if (cas->dataHi) 4585 aoccCount_Expr(uses, cas->dataHi); 4586 aoccCount_Expr(uses, cas->dataLo); 4587 return; 4588 case Ist_LLSC: 4589 aoccCount_Expr(uses, st->Ist.LLSC.addr); 4590 if (st->Ist.LLSC.storedata) 4591 aoccCount_Expr(uses, st->Ist.LLSC.storedata); 4592 return; 4593 case Ist_Dirty: 4594 d = st->Ist.Dirty.details; 4595 if (d->mFx != Ifx_None) 4596 aoccCount_Expr(uses, d->mAddr); 4597 aoccCount_Expr(uses, d->guard); 4598 for (i = 0; d->args[i]; i++) 4599 aoccCount_Expr(uses, d->args[i]); 4600 return; 4601 case Ist_NoOp: 4602 case Ist_IMark: 4603 case Ist_MBE: 4604 return; 4605 case Ist_Exit: 4606 aoccCount_Expr(uses, st->Ist.Exit.guard); 4607 return; 4608 default: 4609 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4610 vpanic("aoccCount_Stmt"); 4611 } 4612 } 4613 4614 /* Look up a binding for tmp in the env. If found, return the bound 4615 expression, and set the env's binding to NULL so it is marked as 4616 used. If not found, return NULL. */ 4617 4618 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 4619 { 4620 Int i; 4621 for (i = 0; i < A_NENV; i++) { 4622 if (env[i].binder == tmp && env[i].bindee != NULL) { 4623 IRExpr* bindee = env[i].bindee; 4624 env[i].bindee = NULL; 4625 return bindee; 4626 } 4627 } 4628 return NULL; 4629 } 4630 4631 /* Traverse e, looking for temps. For each observed temp, see if env 4632 contains a binding for the temp, and if so return the bound value. 4633 The env has the property that any binding it holds is 4634 'single-shot', so once a binding is used, it is marked as no longer 4635 available, by setting its .bindee field to NULL. */ 4636 4637 static inline Bool is_Unop ( IRExpr* e, IROp op ) { 4638 return e->tag == Iex_Unop && e->Iex.Unop.op == op; 4639 } 4640 static inline Bool is_Binop ( IRExpr* e, IROp op ) { 4641 return e->tag == Iex_Binop && e->Iex.Binop.op == op; 4642 } 4643 4644 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 4645 { 4646 switch (op) { 4647 case Iop_Or32: 4648 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ 4649 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) 4650 return IRExpr_Unop( Iop_CmpwNEZ32, 4651 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 4652 a2->Iex.Unop.arg ) ); 4653 break; 4654 4655 case Iop_CmpNE32: 4656 /* Since X has type Ity_I1 we can simplify: 4657 CmpNE32(1Uto32(X),0)) ==> X */ 4658 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2)) 4659 return a1->Iex.Unop.arg; 4660 break; 4661 4662 default: 4663 break; 4664 } 4665 /* no reduction rule applies */ 4666 return IRExpr_Binop( op, a1, a2 ); 4667 } 4668 4669 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) 4670 { 4671 switch (op) { 4672 case Iop_CmpwNEZ64: 4673 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 4674 if (is_Binop(aa, Iop_Or64) 4675 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) 4676 return fold_IRExpr_Unop( 4677 Iop_CmpwNEZ64, 4678 IRExpr_Binop(Iop_Or64, 4679 aa->Iex.Binop.arg1->Iex.Unop.arg, 4680 aa->Iex.Binop.arg2)); 4681 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 4682 if (is_Binop(aa, Iop_Or64) 4683 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) 4684 return fold_IRExpr_Unop( 4685 Iop_CmpwNEZ64, 4686 IRExpr_Binop(Iop_Or64, 4687 aa->Iex.Binop.arg1, 4688 aa->Iex.Binop.arg2->Iex.Unop.arg)); 4689 break; 4690 case Iop_CmpNEZ64: 4691 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ 4692 if (is_Unop(aa, Iop_Left64)) 4693 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); 4694 break; 4695 case Iop_CmpwNEZ32: 4696 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ 4697 if (is_Unop(aa, Iop_CmpwNEZ32)) 4698 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); 4699 break; 4700 case Iop_CmpNEZ32: 4701 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ 4702 if (is_Unop(aa, Iop_Left32)) 4703 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); 4704 /* CmpNEZ32( 1Uto32(X) ) --> X */ 4705 if (is_Unop(aa, Iop_1Uto32)) 4706 return aa->Iex.Unop.arg; 4707 break; 4708 case Iop_CmpNEZ8: 4709 /* CmpNEZ8( 1Uto8(X) ) --> X */ 4710 if (is_Unop(aa, Iop_1Uto8)) 4711 return aa->Iex.Unop.arg; 4712 break; 4713 case Iop_Left32: 4714 /* Left32( Left32(x) ) --> Left32(x) */ 4715 if (is_Unop(aa, Iop_Left32)) 4716 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); 4717 break; 4718 case Iop_32to1: 4719 /* 32to1( 1Uto32 ( x ) ) --> x */ 4720 if (is_Unop(aa, Iop_1Uto32)) 4721 return aa->Iex.Unop.arg; 4722 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ 4723 if (is_Unop(aa, Iop_CmpwNEZ32)) 4724 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); 4725 break; 4726 case Iop_64to1: 4727 /* 64to1( 1Uto64 ( x ) ) --> x */ 4728 if (is_Unop(aa, Iop_1Uto64)) 4729 return aa->Iex.Unop.arg; 4730 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ 4731 if (is_Unop(aa, Iop_CmpwNEZ64)) 4732 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); 4733 break; 4734 case Iop_64to32: 4735 /* 64to32( 32Uto64 ( x )) --> x */ 4736 if (is_Unop(aa, Iop_32Uto64)) 4737 return aa->Iex.Unop.arg; 4738 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */ 4739 if (is_Unop(aa, Iop_8Uto64)) 4740 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg); 4741 break; 4742 4743 case Iop_32Uto64: 4744 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */ 4745 if (is_Unop(aa, Iop_8Uto32)) 4746 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg); 4747 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */ 4748 if (is_Unop(aa, Iop_16Uto32)) 4749 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg); 4750 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh )) 4751 --> Shr64( 32Uto64(64to32(x)), sh )) */ 4752 if (is_Unop(aa, Iop_64to32) 4753 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64) 4754 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 4755 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 4756 Iop_64to32)) { 4757 return aa->Iex.Unop.arg; 4758 } 4759 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh )) 4760 --> 32Uto64(64to32( Shl64( x, sh )) */ 4761 if (is_Unop(aa, Iop_64to32) 4762 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64) 4763 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 4764 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 4765 Iop_64to32)) { 4766 return 4767 IRExpr_Unop( 4768 Iop_32Uto64, 4769 IRExpr_Unop( 4770 Iop_64to32, 4771 IRExpr_Binop( 4772 Iop_Shl64, 4773 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg, 4774 aa->Iex.Unop.arg->Iex.Binop.arg2 4775 ))); 4776 } 4777 break; 4778 4779 case Iop_1Sto32: 4780 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */ 4781 if (is_Unop(aa, Iop_CmpNEZ8) 4782 && is_Unop(aa->Iex.Unop.arg, Iop_32to8) 4783 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32) 4784 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg, 4785 Iop_CmpNEZ32)) { 4786 return IRExpr_Unop( Iop_CmpwNEZ32, 4787 aa->Iex.Unop.arg->Iex.Unop.arg 4788 ->Iex.Unop.arg->Iex.Unop.arg); 4789 } 4790 break; 4791 4792 default: 4793 break; 4794 } 4795 /* no reduction rule applies */ 4796 return IRExpr_Unop( op, aa ); 4797 } 4798 4799 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) 4800 { 4801 IRExpr* e2; 4802 IRExpr** args2; 4803 Int i; 4804 4805 switch (e->tag) { 4806 4807 case Iex_CCall: 4808 args2 = shallowCopyIRExprVec(e->Iex.CCall.args); 4809 for (i = 0; args2[i]; i++) 4810 args2[i] = atbSubst_Expr(env,args2[i]); 4811 return IRExpr_CCall( 4812 e->Iex.CCall.cee, 4813 e->Iex.CCall.retty, 4814 args2 4815 ); 4816 case Iex_RdTmp: 4817 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp); 4818 return e2 ? e2 : e; 4819 case Iex_Mux0X: 4820 return IRExpr_Mux0X( 4821 atbSubst_Expr(env, e->Iex.Mux0X.cond), 4822 atbSubst_Expr(env, e->Iex.Mux0X.expr0), 4823 atbSubst_Expr(env, e->Iex.Mux0X.exprX) 4824 ); 4825 case Iex_Qop: 4826 return IRExpr_Qop( 4827 e->Iex.Qop.details->op, 4828 atbSubst_Expr(env, e->Iex.Qop.details->arg1), 4829 atbSubst_Expr(env, e->Iex.Qop.details->arg2), 4830 atbSubst_Expr(env, e->Iex.Qop.details->arg3), 4831 atbSubst_Expr(env, e->Iex.Qop.details->arg4) 4832 ); 4833 case Iex_Triop: 4834 return IRExpr_Triop( 4835 e->Iex.Triop.details->op, 4836 atbSubst_Expr(env, e->Iex.Triop.details->arg1), 4837 atbSubst_Expr(env, e->Iex.Triop.details->arg2), 4838 atbSubst_Expr(env, e->Iex.Triop.details->arg3) 4839 ); 4840 case Iex_Binop: 4841 return fold_IRExpr_Binop( 4842 e->Iex.Binop.op, 4843 atbSubst_Expr(env, e->Iex.Binop.arg1), 4844 atbSubst_Expr(env, e->Iex.Binop.arg2) 4845 ); 4846 case Iex_Unop: 4847 return fold_IRExpr_Unop( 4848 e->Iex.Unop.op, 4849 atbSubst_Expr(env, e->Iex.Unop.arg) 4850 ); 4851 case Iex_Load: 4852 return IRExpr_Load( 4853 e->Iex.Load.end, 4854 e->Iex.Load.ty, 4855 atbSubst_Expr(env, e->Iex.Load.addr) 4856 ); 4857 case Iex_GetI: 4858 return IRExpr_GetI( 4859 e->Iex.GetI.descr, 4860 atbSubst_Expr(env, e->Iex.GetI.ix), 4861 e->Iex.GetI.bias 4862 ); 4863 case Iex_Const: 4864 case Iex_Get: 4865 return e; 4866 default: 4867 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4868 vpanic("atbSubst_Expr"); 4869 } 4870 } 4871 4872 /* Same deal as atbSubst_Expr, except for stmts. */ 4873 4874 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) 4875 { 4876 Int i; 4877 IRDirty *d, *d2; 4878 IRCAS *cas, *cas2; 4879 IRPutI *puti, *puti2; 4880 4881 switch (st->tag) { 4882 case Ist_AbiHint: 4883 return IRStmt_AbiHint( 4884 atbSubst_Expr(env, st->Ist.AbiHint.base), 4885 st->Ist.AbiHint.len, 4886 atbSubst_Expr(env, st->Ist.AbiHint.nia) 4887 ); 4888 case Ist_Store: 4889 return IRStmt_Store( 4890 st->Ist.Store.end, 4891 atbSubst_Expr(env, st->Ist.Store.addr), 4892 atbSubst_Expr(env, st->Ist.Store.data) 4893 ); 4894 case Ist_WrTmp: 4895 return IRStmt_WrTmp( 4896 st->Ist.WrTmp.tmp, 4897 atbSubst_Expr(env, st->Ist.WrTmp.data) 4898 ); 4899 case Ist_Put: 4900 return IRStmt_Put( 4901 st->Ist.Put.offset, 4902 atbSubst_Expr(env, st->Ist.Put.data) 4903 ); 4904 case Ist_PutI: 4905 puti = st->Ist.PutI.details; 4906 puti2 = mkIRPutI(puti->descr, 4907 atbSubst_Expr(env, puti->ix), 4908 puti->bias, 4909 atbSubst_Expr(env, puti->data)); 4910 return IRStmt_PutI(puti2); 4911 4912 case Ist_Exit: 4913 return IRStmt_Exit( 4914 atbSubst_Expr(env, st->Ist.Exit.guard), 4915 st->Ist.Exit.jk, 4916 st->Ist.Exit.dst, 4917 st->Ist.Exit.offsIP 4918 ); 4919 case Ist_IMark: 4920 return IRStmt_IMark(st->Ist.IMark.addr, 4921 st->Ist.IMark.len, 4922 st->Ist.IMark.delta); 4923 case Ist_NoOp: 4924 return IRStmt_NoOp(); 4925 case Ist_MBE: 4926 return IRStmt_MBE(st->Ist.MBE.event); 4927 case Ist_CAS: 4928 cas = st->Ist.CAS.details; 4929 cas2 = mkIRCAS( 4930 cas->oldHi, cas->oldLo, cas->end, 4931 atbSubst_Expr(env, cas->addr), 4932 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL, 4933 atbSubst_Expr(env, cas->expdLo), 4934 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL, 4935 atbSubst_Expr(env, cas->dataLo) 4936 ); 4937 return IRStmt_CAS(cas2); 4938 case Ist_LLSC: 4939 return IRStmt_LLSC( 4940 st->Ist.LLSC.end, 4941 st->Ist.LLSC.result, 4942 atbSubst_Expr(env, st->Ist.LLSC.addr), 4943 st->Ist.LLSC.storedata 4944 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL 4945 ); 4946 case Ist_Dirty: 4947 d = st->Ist.Dirty.details; 4948 d2 = emptyIRDirty(); 4949 *d2 = *d; 4950 if (d2->mFx != Ifx_None) 4951 d2->mAddr = atbSubst_Expr(env, d2->mAddr); 4952 d2->guard = atbSubst_Expr(env, d2->guard); 4953 for (i = 0; d2->args[i]; i++) 4954 d2->args[i] = atbSubst_Expr(env, d2->args[i]); 4955 return IRStmt_Dirty(d2); 4956 default: 4957 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4958 vpanic("atbSubst_Stmt"); 4959 } 4960 } 4961 4962 /* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb ) 4963 { 4964 Int i, j, k, m; 4965 Bool stmtPuts, stmtStores, invalidateMe; 4966 IRStmt* st; 4967 IRStmt* st2; 4968 ATmpInfo env[A_NENV]; 4969 4970 Bool max_ga_known = False; 4971 Addr64 max_ga = 0; 4972 4973 Int n_tmps = bb->tyenv->types_used; 4974 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort)); 4975 4976 /* Phase 1. Scan forwards in bb, counting use occurrences of each 4977 temp. Also count occurrences in the bb->next field. Take the 4978 opportunity to also find the maximum guest address in the block, 4979 since that will be needed later for deciding when we can safely 4980 elide event checks. */ 4981 4982 for (i = 0; i < n_tmps; i++) 4983 uses[i] = 0; 4984 4985 for (i = 0; i < bb->stmts_used; i++) { 4986 st = bb->stmts[i]; 4987 switch (st->tag) { 4988 case Ist_NoOp: 4989 continue; 4990 case Ist_IMark: { 4991 Int len = st->Ist.IMark.len; 4992 Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1; 4993 max_ga_known = True; 4994 if (mga > max_ga) 4995 max_ga = mga; 4996 break; 4997 } 4998 default: 4999 break; 5000 } 5001 aoccCount_Stmt( uses, st ); 5002 } 5003 aoccCount_Expr(uses, bb->next ); 5004 5005 # if 0 5006 for (i = 0; i < n_tmps; i++) { 5007 if (uses[i] == 0) 5008 continue; 5009 ppIRTemp( (IRTemp)i ); 5010 vex_printf(" used %d\n", (Int)uses[i] ); 5011 } 5012 # endif 5013 5014 /* Phase 2. Scan forwards in bb. For each statement in turn: 5015 5016 If the env is full, emit the end element. This guarantees 5017 there is at least one free slot in the following. 5018 5019 On seeing 't = E', occ(t)==1, 5020 let E'=env(E) 5021 delete this stmt 5022 add t -> E' to the front of the env 5023 Examine E' and set the hints for E' appropriately 5024 (doesLoad? doesGet?) 5025 5026 On seeing any other stmt, 5027 let stmt' = env(stmt) 5028 remove from env any 't=E' binds invalidated by stmt 5029 emit the invalidated stmts 5030 emit stmt' 5031 compact any holes in env 5032 by sliding entries towards the front 5033 5034 Finally, apply env to bb->next. 5035 */ 5036 5037 for (i = 0; i < A_NENV; i++) { 5038 env[i].bindee = NULL; 5039 env[i].binder = IRTemp_INVALID; 5040 } 5041 5042 /* The stmts in bb are being reordered, and we are guaranteed to 5043 end up with no more than the number we started with. Use i to 5044 be the cursor of the current stmt examined and j <= i to be that 5045 for the current stmt being written. 5046 */ 5047 j = 0; 5048 for (i = 0; i < bb->stmts_used; i++) { 5049 5050 st = bb->stmts[i]; 5051 if (st->tag == Ist_NoOp) 5052 continue; 5053 5054 /* Ensure there's at least one space in the env, by emitting 5055 the oldest binding if necessary. */ 5056 if (env[A_NENV-1].bindee != NULL) { 5057 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder, 5058 env[A_NENV-1].bindee ); 5059 j++; 5060 vassert(j <= i); 5061 env[A_NENV-1].bindee = NULL; 5062 } 5063 5064 /* Consider current stmt. */ 5065 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) { 5066 IRExpr *e, *e2; 5067 5068 /* optional extra: dump dead bindings as we find them. 5069 Removes the need for a prior dead-code removal pass. */ 5070 if (uses[st->Ist.WrTmp.tmp] == 0) { 5071 if (0) vex_printf("DEAD binding\n"); 5072 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5073 } 5074 vassert(uses[st->Ist.WrTmp.tmp] == 1); 5075 5076 /* ok, we have 't = E', occ(t)==1. Do the abovementioned 5077 actions. */ 5078 e = st->Ist.WrTmp.data; 5079 e2 = atbSubst_Expr(env, e); 5080 addToEnvFront(env, st->Ist.WrTmp.tmp, e2); 5081 setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2); 5082 /* don't advance j, as we are deleting this stmt and instead 5083 holding it temporarily in the env. */ 5084 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5085 } 5086 5087 /* we get here for any other kind of statement. */ 5088 /* 'use up' any bindings required by the current statement. */ 5089 st2 = atbSubst_Stmt(env, st); 5090 5091 /* Now, before this stmt, dump any bindings in env that it 5092 invalidates. These need to be dumped in the order in which 5093 they originally entered env -- that means from oldest to 5094 youngest. */ 5095 5096 /* stmtPuts/stmtStores characterise what the stmt under 5097 consideration does, or might do (sidely safe @ True). */ 5098 stmtPuts 5099 = toBool( st->tag == Ist_Put 5100 || st->tag == Ist_PutI 5101 || st->tag == Ist_Dirty ); 5102 5103 /* be True if this stmt writes memory or might do (==> we don't 5104 want to reorder other loads or stores relative to it). Also, 5105 both LL and SC fall under this classification, since we 5106 really ought to be conservative and not reorder any other 5107 memory transactions relative to them. */ 5108 stmtStores 5109 = toBool( st->tag == Ist_Store 5110 || st->tag == Ist_Dirty 5111 || st->tag == Ist_LLSC 5112 || st->tag == Ist_CAS ); 5113 5114 for (k = A_NENV-1; k >= 0; k--) { 5115 if (env[k].bindee == NULL) 5116 continue; 5117 /* Compare the actions of this stmt with the actions of 5118 binding 'k', to see if they invalidate the binding. */ 5119 invalidateMe 5120 = toBool( 5121 /* a store invalidates loaded data */ 5122 (env[k].doesLoad && stmtStores) 5123 /* a put invalidates get'd data */ 5124 || (env[k].doesGet && stmtPuts) 5125 /* a put invalidates loaded data. Note, we could do 5126 much better here in the sense that we only need to 5127 invalidate trees containing loads if the Put in 5128 question is marked as requiring precise 5129 exceptions. */ 5130 || (env[k].doesLoad && stmtPuts) 5131 /* probably overly conservative: a memory bus event 5132 invalidates absolutely everything, so that all 5133 computation prior to it is forced to complete before 5134 proceeding with the event (fence,lock,unlock). */ 5135 || st->tag == Ist_MBE 5136 /* also be (probably overly) paranoid re AbiHints */ 5137 || st->tag == Ist_AbiHint 5138 ); 5139 if (invalidateMe) { 5140 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee ); 5141 j++; 5142 vassert(j <= i); 5143 env[k].bindee = NULL; 5144 } 5145 } 5146 5147 /* Slide in-use entries in env up to the front */ 5148 m = 0; 5149 for (k = 0; k < A_NENV; k++) { 5150 if (env[k].bindee != NULL) { 5151 env[m] = env[k]; 5152 m++; 5153 } 5154 } 5155 for (m = m; m < A_NENV; m++) { 5156 env[m].bindee = NULL; 5157 } 5158 5159 /* finally, emit the substituted statement */ 5160 bb->stmts[j] = st2; 5161 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */ 5162 j++; 5163 5164 vassert(j <= i+1); 5165 } /* for each stmt in the original bb ... */ 5166 5167 /* Finally ... substitute the ->next field as much as possible, and 5168 dump any left-over bindings. Hmm. Perhaps there should be no 5169 left over bindings? Or any left-over bindings are 5170 by definition dead? */ 5171 bb->next = atbSubst_Expr(env, bb->next); 5172 bb->stmts_used = j; 5173 5174 return max_ga_known ? max_ga : ~(Addr64)0; 5175 } 5176 5177 5178 /*---------------------------------------------------------------*/ 5179 /*--- iropt main ---*/ 5180 /*---------------------------------------------------------------*/ 5181 5182 static Bool iropt_verbose = False; /* True; */ 5183 5184 5185 /* Do a simple cleanup pass on bb. This is: redundant Get removal, 5186 redundant Put removal, constant propagation, dead code removal, 5187 clean helper specialisation, and dead code removal (again). 5188 */ 5189 5190 5191 static 5192 IRSB* cheap_transformations ( 5193 IRSB* bb, 5194 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int), 5195 Bool (*preciseMemExnsFn)(Int,Int) 5196 ) 5197 { 5198 redundant_get_removal_BB ( bb ); 5199 if (iropt_verbose) { 5200 vex_printf("\n========= REDUNDANT GET\n\n" ); 5201 ppIRSB(bb); 5202 } 5203 5204 if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) { 5205 redundant_put_removal_BB ( bb, preciseMemExnsFn ); 5206 } 5207 if (iropt_verbose) { 5208 vex_printf("\n========= REDUNDANT PUT\n\n" ); 5209 ppIRSB(bb); 5210 } 5211 5212 bb = cprop_BB ( bb ); 5213 if (iropt_verbose) { 5214 vex_printf("\n========= CPROPD\n\n" ); 5215 ppIRSB(bb); 5216 } 5217 5218 do_deadcode_BB ( bb ); 5219 if (iropt_verbose) { 5220 vex_printf("\n========= DEAD\n\n" ); 5221 ppIRSB(bb); 5222 } 5223 5224 bb = spec_helpers_BB ( bb, specHelper ); 5225 do_deadcode_BB ( bb ); 5226 if (iropt_verbose) { 5227 vex_printf("\n========= SPECd \n\n" ); 5228 ppIRSB(bb); 5229 } 5230 5231 return bb; 5232 } 5233 5234 5235 /* Do some more expensive transformations on bb, which are aimed at 5236 optimising as much as possible in the presence of GetI and PutI. */ 5237 5238 static 5239 IRSB* expensive_transformations( IRSB* bb ) 5240 { 5241 (void)do_cse_BB( bb ); 5242 collapse_AddSub_chains_BB( bb ); 5243 do_redundant_GetI_elimination( bb ); 5244 if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) { 5245 do_redundant_PutI_elimination( bb ); 5246 } 5247 do_deadcode_BB( bb ); 5248 return bb; 5249 } 5250 5251 5252 /* Scan a flattened BB to look for signs that more expensive 5253 optimisations might be useful: 5254 - find out if there are any GetIs and PutIs 5255 - find out if there are any floating or vector-typed temporaries 5256 */ 5257 5258 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, 5259 /*OUT*/Bool* hasVorFtemps, 5260 IRSB* bb ) 5261 { 5262 Int i, j; 5263 IRStmt* st; 5264 IRDirty* d; 5265 IRCAS* cas; 5266 5267 *hasGetIorPutI = False; 5268 *hasVorFtemps = False; 5269 5270 for (i = 0; i < bb->stmts_used; i++) { 5271 st = bb->stmts[i]; 5272 switch (st->tag) { 5273 case Ist_AbiHint: 5274 vassert(isIRAtom(st->Ist.AbiHint.base)); 5275 vassert(isIRAtom(st->Ist.AbiHint.nia)); 5276 break; 5277 case Ist_PutI: 5278 *hasGetIorPutI = True; 5279 break; 5280 case Ist_WrTmp: 5281 if (st->Ist.WrTmp.data->tag == Iex_GetI) 5282 *hasGetIorPutI = True; 5283 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) { 5284 case Ity_I1: case Ity_I8: case Ity_I16: 5285 case Ity_I32: case Ity_I64: case Ity_I128: 5286 break; 5287 case Ity_F32: case Ity_F64: case Ity_F128: 5288 case Ity_V128: case Ity_V256: 5289 *hasVorFtemps = True; 5290 break; 5291 case Ity_D32: case Ity_D64: case Ity_D128: 5292 *hasVorFtemps = True; 5293 break; 5294 default: 5295 goto bad; 5296 } 5297 break; 5298 case Ist_Put: 5299 vassert(isIRAtom(st->Ist.Put.data)); 5300 break; 5301 case Ist_Store: 5302 vassert(isIRAtom(st->Ist.Store.addr)); 5303 vassert(isIRAtom(st->Ist.Store.data)); 5304 break; 5305 case Ist_CAS: 5306 cas = st->Ist.CAS.details; 5307 vassert(isIRAtom(cas->addr)); 5308 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 5309 vassert(isIRAtom(cas->expdLo)); 5310 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 5311 vassert(isIRAtom(cas->dataLo)); 5312 break; 5313 case Ist_LLSC: 5314 vassert(isIRAtom(st->Ist.LLSC.addr)); 5315 if (st->Ist.LLSC.storedata) 5316 vassert(isIRAtom(st->Ist.LLSC.storedata)); 5317 break; 5318 case Ist_Dirty: 5319 d = st->Ist.Dirty.details; 5320 vassert(isIRAtom(d->guard)); 5321 for (j = 0; d->args[j]; j++) 5322 vassert(isIRAtom(d->args[j])); 5323 if (d->mFx != Ifx_None) 5324 vassert(isIRAtom(d->mAddr)); 5325 break; 5326 case Ist_NoOp: 5327 case Ist_IMark: 5328 case Ist_MBE: 5329 break; 5330 case Ist_Exit: 5331 vassert(isIRAtom(st->Ist.Exit.guard)); 5332 break; 5333 default: 5334 bad: 5335 ppIRStmt(st); 5336 vpanic("considerExpensives"); 5337 } 5338 } 5339 } 5340 5341 5342 /* ---------------- The main iropt entry point. ---------------- */ 5343 5344 /* exported from this file */ 5345 /* Rules of the game: 5346 5347 - IRExpr/IRStmt trees should be treated as immutable, as they 5348 may get shared. So never change a field of such a tree node; 5349 instead construct and return a new one if needed. 5350 */ 5351 5352 5353 IRSB* do_iropt_BB( 5354 IRSB* bb0, 5355 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int), 5356 Bool (*preciseMemExnsFn)(Int,Int), 5357 Addr64 guest_addr, 5358 VexArch guest_arch 5359 ) 5360 { 5361 static Int n_total = 0; 5362 static Int n_expensive = 0; 5363 5364 Bool hasGetIorPutI, hasVorFtemps; 5365 IRSB *bb, *bb2; 5366 5367 n_total++; 5368 5369 /* First flatten the block out, since all other 5370 phases assume flat code. */ 5371 5372 bb = flatten_BB ( bb0 ); 5373 5374 if (iropt_verbose) { 5375 vex_printf("\n========= FLAT\n\n" ); 5376 ppIRSB(bb); 5377 } 5378 5379 /* If at level 0, stop now. */ 5380 if (vex_control.iropt_level <= 0) return bb; 5381 5382 /* Now do a preliminary cleanup pass, and figure out if we also 5383 need to do 'expensive' optimisations. Expensive optimisations 5384 are deemed necessary if the block contains any GetIs or PutIs. 5385 If needed, do expensive transformations and then another cheap 5386 cleanup pass. */ 5387 5388 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 5389 5390 if (guest_arch == VexArchARM) { 5391 /* Translating Thumb2 code produces a lot of chaff. We have to 5392 work extra hard to get rid of it. */ 5393 bb = cprop_BB(bb); 5394 bb = spec_helpers_BB ( bb, specHelper ); 5395 if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) { 5396 redundant_put_removal_BB ( bb, preciseMemExnsFn ); 5397 } 5398 do_cse_BB( bb ); 5399 do_deadcode_BB( bb ); 5400 } 5401 5402 if (vex_control.iropt_level > 1) { 5403 5404 /* Peer at what we have, to decide how much more effort to throw 5405 at it. */ 5406 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb ); 5407 5408 if (hasVorFtemps && !hasGetIorPutI) { 5409 /* If any evidence of FP or Vector activity, CSE, as that 5410 tends to mop up all manner of lardy code to do with 5411 rounding modes. Don't bother if hasGetIorPutI since that 5412 case leads into the expensive transformations, which do 5413 CSE anyway. */ 5414 (void)do_cse_BB( bb ); 5415 do_deadcode_BB( bb ); 5416 } 5417 5418 if (hasGetIorPutI) { 5419 Bool cses; 5420 n_expensive++; 5421 if (DEBUG_IROPT) 5422 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive); 5423 bb = expensive_transformations( bb ); 5424 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 5425 /* Potentially common up GetIs */ 5426 cses = do_cse_BB( bb ); 5427 if (cses) 5428 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 5429 } 5430 5431 /* Now have a go at unrolling simple (single-BB) loops. If 5432 successful, clean up the results as much as possible. */ 5433 5434 bb2 = maybe_loop_unroll_BB( bb, guest_addr ); 5435 if (bb2) { 5436 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn ); 5437 if (hasGetIorPutI) { 5438 bb = expensive_transformations( bb ); 5439 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 5440 } else { 5441 /* at least do CSE and dead code removal */ 5442 do_cse_BB( bb ); 5443 do_deadcode_BB( bb ); 5444 } 5445 if (0) vex_printf("vex iropt: unrolled a loop\n"); 5446 } 5447 5448 } 5449 5450 return bb; 5451 } 5452 5453 5454 /*---------------------------------------------------------------*/ 5455 /*--- end ir_opt.c ---*/ 5456 /*---------------------------------------------------------------*/ 5457