1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*---------------------------------------------------------------*/ 4 /*--- begin ir_opt.c ---*/ 5 /*---------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2004-2013 OpenWorks LLP 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35 */ 36 37 #include "libvex_basictypes.h" 38 #include "libvex_ir.h" 39 #include "libvex.h" 40 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "ir_opt.h" 44 45 46 /* Set to 1 for lots of debugging output. */ 47 #define DEBUG_IROPT 0 48 49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */ 50 #define STATS_IROPT 0 51 52 53 /* What iropt does, 29 Dec 04. 54 55 It takes an IRSB and produces a new one with the same meaning, 56 defined thus: 57 58 After execution of the new BB, all guest state and guest memory is 59 the same as after execution of the original. This is true 60 regardless of how the block was exited (at the end vs side exit). 61 62 In addition, parts of the guest state will be identical to that 63 created by execution of the original at the following observation 64 points: 65 66 * In a dirty helper call, any parts of the guest state that the 67 helper states that it reads or modifies will be up to date. 68 Also, guest memory will be up to date. Parts of the guest state 69 not marked as being read or modified by the helper cannot be 70 assumed to be up-to-date at the point where the helper is called. 71 72 * If iropt_register_updates == VexRegUpdSpAtMemAccess : 73 The guest state is only up to date only as explained above 74 (i.e. at SB exits and as specified by dirty helper call). 75 Also, the stack pointer register is up to date at memory 76 exception points (as this is needed for the stack extension 77 logic in m_signals.c). 78 79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess : 80 Immediately prior to any load or store, those parts of the guest 81 state marked as requiring precise exceptions will be up to date. 82 Also, guest memory will be up to date. Parts of the guest state 83 not marked as requiring precise exceptions cannot be assumed to 84 be up-to-date at the point of the load/store. 85 86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess: 87 Same as minimal, but all the guest state is up to date at memory 88 exception points. 89 90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn : 91 Guest state is up to date at each instruction. 92 93 The relative order of loads and stores (including loads/stores of 94 guest memory done by dirty helpers annotated as such) is not 95 changed. However, the relative order of loads with no intervening 96 stores/modifies may be changed. 97 98 Transformation order 99 ~~~~~~~~~~~~~~~~~~~~ 100 101 There are three levels of optimisation, controlled by 102 vex_control.iropt_level. Define first: 103 104 "Cheap transformations" are the following sequence: 105 * Redundant-Get removal 106 * Redundant-Put removal 107 * Constant propagation/folding 108 * Dead code removal 109 * Specialisation of clean helper functions 110 * Dead code removal 111 112 "Expensive transformations" are the following sequence: 113 * CSE 114 * Folding of add/sub chains 115 * Redundant-GetI removal 116 * Redundant-PutI removal 117 * Dead code removal 118 119 Then the transformations are as follows, as defined by 120 vex_control.iropt_level: 121 122 Level 0: 123 * Flatten into atomic form. 124 125 Level 1: the following sequence: 126 * Flatten into atomic form. 127 * Cheap transformations. 128 129 Level 2: the following sequence 130 * Flatten into atomic form. 131 * Cheap transformations. 132 * If block contains any floating or vector types, CSE. 133 * If block contains GetI or PutI, Expensive transformations. 134 * Try unrolling loops. Three possible outcomes: 135 - No effect: do nothing more. 136 - Unrolled a loop, and block does not contain GetI or PutI: 137 Do: * CSE 138 * Dead code removal 139 - Unrolled a loop, and block contains GetI or PutI: 140 Do: * Expensive transformations 141 * Cheap transformations 142 */ 143 144 /* Implementation notes, 29 Dec 04. 145 146 TODO (important): I think rPutI removal ignores precise exceptions 147 and is therefore in a sense, wrong. In the sense that PutIs are 148 assumed not to write parts of the guest state that we need to have 149 up-to-date at loads/stores. So far on x86 guest that has not 150 mattered since indeed only the x87 FP registers and tags are 151 accessed using GetI/PutI, and there is no need so far for them to 152 be up to date at mem exception points. The rPutI pass should be 153 fixed. 154 155 TODO: improve pessimistic handling of precise exceptions 156 in the tree builder. 157 158 TODO: check interaction of rGetI and dirty helpers. 159 160 F64i constants are treated differently from other constants. 161 They are not regarded as atoms, and instead lifted off and 162 bound to temps. This allows them to participate in CSE, which 163 is important for getting good performance for x86 guest code. 164 165 CSE up F64 literals (already doing F64is) 166 167 CSE: consider carefully the requirement for precise exns 168 prior to making CSE any more aggressive. */ 169 170 171 /*---------------------------------------------------------------*/ 172 /*--- Finite mappery, of a sort ---*/ 173 /*---------------------------------------------------------------*/ 174 175 /* General map from HWord-sized thing HWord-sized thing. Could be by 176 hashing, but it's not clear whether or not this would really be any 177 faster. */ 178 179 typedef 180 struct { 181 Bool* inuse; 182 HWord* key; 183 HWord* val; 184 Int size; 185 Int used; 186 } 187 HashHW; 188 189 static HashHW* newHHW ( void ) 190 { 191 HashHW* h = LibVEX_Alloc(sizeof(HashHW)); 192 h->size = 8; 193 h->used = 0; 194 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool)); 195 h->key = LibVEX_Alloc(h->size * sizeof(HWord)); 196 h->val = LibVEX_Alloc(h->size * sizeof(HWord)); 197 return h; 198 } 199 200 201 /* Look up key in the map. */ 202 203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key ) 204 { 205 Int i; 206 /* vex_printf("lookupHHW(%llx)\n", key ); */ 207 for (i = 0; i < h->used; i++) { 208 if (h->inuse[i] && h->key[i] == key) { 209 if (val) 210 *val = h->val[i]; 211 return True; 212 } 213 } 214 return False; 215 } 216 217 218 /* Add key->val to the map. Replaces any existing binding for key. */ 219 220 static void addToHHW ( HashHW* h, HWord key, HWord val ) 221 { 222 Int i, j; 223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */ 224 225 /* Find and replace existing binding, if any. */ 226 for (i = 0; i < h->used; i++) { 227 if (h->inuse[i] && h->key[i] == key) { 228 h->val[i] = val; 229 return; 230 } 231 } 232 233 /* Ensure a space is available. */ 234 if (h->used == h->size) { 235 /* Copy into arrays twice the size. */ 236 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool)); 237 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord)); 238 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord)); 239 for (i = j = 0; i < h->size; i++) { 240 if (!h->inuse[i]) continue; 241 inuse2[j] = True; 242 key2[j] = h->key[i]; 243 val2[j] = h->val[i]; 244 j++; 245 } 246 h->used = j; 247 h->size *= 2; 248 h->inuse = inuse2; 249 h->key = key2; 250 h->val = val2; 251 } 252 253 /* Finally, add it. */ 254 vassert(h->used < h->size); 255 h->inuse[h->used] = True; 256 h->key[h->used] = key; 257 h->val[h->used] = val; 258 h->used++; 259 } 260 261 262 /*---------------------------------------------------------------*/ 263 /*--- Flattening out a BB into atomic SSA form ---*/ 264 /*---------------------------------------------------------------*/ 265 266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp 267 copies made by flattening. If in doubt return False. */ 268 269 static Bool isFlat ( IRExpr* e ) 270 { 271 if (e->tag == Iex_Get) 272 return True; 273 if (e->tag == Iex_Binop) 274 return toBool( isIRAtom(e->Iex.Binop.arg1) 275 && isIRAtom(e->Iex.Binop.arg2) ); 276 if (e->tag == Iex_Load) 277 return isIRAtom(e->Iex.Load.addr); 278 return False; 279 } 280 281 /* Flatten out 'ex' so it is atomic, returning a new expression with 282 the same value, after having appended extra IRTemp assignments to 283 the end of 'bb'. */ 284 285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) 286 { 287 Int i; 288 IRExpr** newargs; 289 IRType ty = typeOfIRExpr(bb->tyenv, ex); 290 IRTemp t1; 291 292 switch (ex->tag) { 293 294 case Iex_GetI: 295 t1 = newIRTemp(bb->tyenv, ty); 296 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 297 IRExpr_GetI(ex->Iex.GetI.descr, 298 flatten_Expr(bb, ex->Iex.GetI.ix), 299 ex->Iex.GetI.bias))); 300 return IRExpr_RdTmp(t1); 301 302 case Iex_Get: 303 t1 = newIRTemp(bb->tyenv, ty); 304 addStmtToIRSB(bb, 305 IRStmt_WrTmp(t1, ex)); 306 return IRExpr_RdTmp(t1); 307 308 case Iex_Qop: { 309 IRQop* qop = ex->Iex.Qop.details; 310 t1 = newIRTemp(bb->tyenv, ty); 311 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 312 IRExpr_Qop(qop->op, 313 flatten_Expr(bb, qop->arg1), 314 flatten_Expr(bb, qop->arg2), 315 flatten_Expr(bb, qop->arg3), 316 flatten_Expr(bb, qop->arg4)))); 317 return IRExpr_RdTmp(t1); 318 } 319 320 case Iex_Triop: { 321 IRTriop* triop = ex->Iex.Triop.details; 322 t1 = newIRTemp(bb->tyenv, ty); 323 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 324 IRExpr_Triop(triop->op, 325 flatten_Expr(bb, triop->arg1), 326 flatten_Expr(bb, triop->arg2), 327 flatten_Expr(bb, triop->arg3)))); 328 return IRExpr_RdTmp(t1); 329 } 330 331 case Iex_Binop: 332 t1 = newIRTemp(bb->tyenv, ty); 333 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 334 IRExpr_Binop(ex->Iex.Binop.op, 335 flatten_Expr(bb, ex->Iex.Binop.arg1), 336 flatten_Expr(bb, ex->Iex.Binop.arg2)))); 337 return IRExpr_RdTmp(t1); 338 339 case Iex_Unop: 340 t1 = newIRTemp(bb->tyenv, ty); 341 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 342 IRExpr_Unop(ex->Iex.Unop.op, 343 flatten_Expr(bb, ex->Iex.Unop.arg)))); 344 return IRExpr_RdTmp(t1); 345 346 case Iex_Load: 347 t1 = newIRTemp(bb->tyenv, ty); 348 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 349 IRExpr_Load(ex->Iex.Load.end, 350 ex->Iex.Load.ty, 351 flatten_Expr(bb, ex->Iex.Load.addr)))); 352 return IRExpr_RdTmp(t1); 353 354 case Iex_CCall: 355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); 356 for (i = 0; newargs[i]; i++) 357 newargs[i] = flatten_Expr(bb, newargs[i]); 358 t1 = newIRTemp(bb->tyenv, ty); 359 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 360 IRExpr_CCall(ex->Iex.CCall.cee, 361 ex->Iex.CCall.retty, 362 newargs))); 363 return IRExpr_RdTmp(t1); 364 365 case Iex_ITE: 366 t1 = newIRTemp(bb->tyenv, ty); 367 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond), 369 flatten_Expr(bb, ex->Iex.ITE.iftrue), 370 flatten_Expr(bb, ex->Iex.ITE.iffalse)))); 371 return IRExpr_RdTmp(t1); 372 373 case Iex_Const: 374 /* Lift F64i constants out onto temps so they can be CSEd 375 later. */ 376 if (ex->Iex.Const.con->tag == Ico_F64i) { 377 t1 = newIRTemp(bb->tyenv, ty); 378 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 379 IRExpr_Const(ex->Iex.Const.con))); 380 return IRExpr_RdTmp(t1); 381 } else { 382 /* Leave all other constants alone. */ 383 return ex; 384 } 385 386 case Iex_RdTmp: 387 return ex; 388 389 default: 390 vex_printf("\n"); 391 ppIRExpr(ex); 392 vex_printf("\n"); 393 vpanic("flatten_Expr"); 394 } 395 } 396 397 398 /* Append a completely flattened form of 'st' to the end of 'bb'. */ 399 400 static void flatten_Stmt ( IRSB* bb, IRStmt* st ) 401 { 402 Int i; 403 IRExpr *e1, *e2, *e3, *e4, *e5; 404 IRDirty *d, *d2; 405 IRCAS *cas, *cas2; 406 IRPutI *puti, *puti2; 407 IRLoadG *lg; 408 IRStoreG *sg; 409 switch (st->tag) { 410 case Ist_Put: 411 if (isIRAtom(st->Ist.Put.data)) { 412 /* optimisation to reduce the amount of heap wasted 413 by the flattener */ 414 addStmtToIRSB(bb, st); 415 } else { 416 /* general case, always correct */ 417 e1 = flatten_Expr(bb, st->Ist.Put.data); 418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); 419 } 420 break; 421 case Ist_PutI: 422 puti = st->Ist.PutI.details; 423 e1 = flatten_Expr(bb, puti->ix); 424 e2 = flatten_Expr(bb, puti->data); 425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2); 426 addStmtToIRSB(bb, IRStmt_PutI(puti2)); 427 break; 428 case Ist_WrTmp: 429 if (isFlat(st->Ist.WrTmp.data)) { 430 /* optimisation, to reduce the number of tmp-tmp 431 copies generated */ 432 addStmtToIRSB(bb, st); 433 } else { 434 /* general case, always correct */ 435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data); 436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); 437 } 438 break; 439 case Ist_Store: 440 e1 = flatten_Expr(bb, st->Ist.Store.addr); 441 e2 = flatten_Expr(bb, st->Ist.Store.data); 442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); 443 break; 444 case Ist_StoreG: 445 sg = st->Ist.StoreG.details; 446 e1 = flatten_Expr(bb, sg->addr); 447 e2 = flatten_Expr(bb, sg->data); 448 e3 = flatten_Expr(bb, sg->guard); 449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3)); 450 break; 451 case Ist_LoadG: 452 lg = st->Ist.LoadG.details; 453 e1 = flatten_Expr(bb, lg->addr); 454 e2 = flatten_Expr(bb, lg->alt); 455 e3 = flatten_Expr(bb, lg->guard); 456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 457 e1, e2, e3)); 458 break; 459 case Ist_CAS: 460 cas = st->Ist.CAS.details; 461 e1 = flatten_Expr(bb, cas->addr); 462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; 463 e3 = flatten_Expr(bb, cas->expdLo); 464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; 465 e5 = flatten_Expr(bb, cas->dataLo); 466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, 467 e1, e2, e3, e4, e5 ); 468 addStmtToIRSB(bb, IRStmt_CAS(cas2)); 469 break; 470 case Ist_LLSC: 471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr); 472 e2 = st->Ist.LLSC.storedata 473 ? flatten_Expr(bb, st->Ist.LLSC.storedata) 474 : NULL; 475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, 476 st->Ist.LLSC.result, e1, e2)); 477 break; 478 case Ist_Dirty: 479 d = st->Ist.Dirty.details; 480 d2 = emptyIRDirty(); 481 *d2 = *d; 482 d2->args = shallowCopyIRExprVec(d2->args); 483 if (d2->mFx != Ifx_None) { 484 d2->mAddr = flatten_Expr(bb, d2->mAddr); 485 } else { 486 vassert(d2->mAddr == NULL); 487 } 488 d2->guard = flatten_Expr(bb, d2->guard); 489 for (i = 0; d2->args[i]; i++) { 490 IRExpr* arg = d2->args[i]; 491 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 492 d2->args[i] = flatten_Expr(bb, arg); 493 } 494 addStmtToIRSB(bb, IRStmt_Dirty(d2)); 495 break; 496 case Ist_NoOp: 497 case Ist_MBE: 498 case Ist_IMark: 499 addStmtToIRSB(bb, st); 500 break; 501 case Ist_AbiHint: 502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base); 503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); 504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); 505 break; 506 case Ist_Exit: 507 e1 = flatten_Expr(bb, st->Ist.Exit.guard); 508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, 509 st->Ist.Exit.dst, 510 st->Ist.Exit.offsIP)); 511 break; 512 default: 513 vex_printf("\n"); 514 ppIRStmt(st); 515 vex_printf("\n"); 516 vpanic("flatten_Stmt"); 517 } 518 } 519 520 521 static IRSB* flatten_BB ( IRSB* in ) 522 { 523 Int i; 524 IRSB* out; 525 out = emptyIRSB(); 526 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 527 for (i = 0; i < in->stmts_used; i++) 528 if (in->stmts[i]) 529 flatten_Stmt( out, in->stmts[i] ); 530 out->next = flatten_Expr( out, in->next ); 531 out->jumpkind = in->jumpkind; 532 out->offsIP = in->offsIP; 533 return out; 534 } 535 536 537 /*---------------------------------------------------------------*/ 538 /*--- In-place removal of redundant GETs ---*/ 539 /*---------------------------------------------------------------*/ 540 541 /* Scan forwards, building up an environment binding (min offset, max 542 offset) pairs to values, which will either be temps or constants. 543 544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the 545 env and if it matches, replace the Get with the stored value. If 546 there is no match, add a (minoff,maxoff) :-> t binding. 547 548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env 549 any binding which fully or partially overlaps with (minoff,maxoff). 550 Then add a new (minoff,maxoff) :-> t or c binding. */ 551 552 /* Extract the min/max offsets from a guest state array descriptor. */ 553 554 inline 555 static void getArrayBounds ( IRRegArray* descr, 556 UInt* minoff, UInt* maxoff ) 557 { 558 *minoff = descr->base; 559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1; 560 vassert((*minoff & ~0xFFFF) == 0); 561 vassert((*maxoff & ~0xFFFF) == 0); 562 vassert(*minoff <= *maxoff); 563 } 564 565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */ 566 567 static UInt mk_key_GetPut ( Int offset, IRType ty ) 568 { 569 /* offset should fit in 16 bits. */ 570 UInt minoff = offset; 571 UInt maxoff = minoff + sizeofIRType(ty) - 1; 572 vassert((minoff & ~0xFFFF) == 0); 573 vassert((maxoff & ~0xFFFF) == 0); 574 return (minoff << 16) | maxoff; 575 } 576 577 static UInt mk_key_GetIPutI ( IRRegArray* descr ) 578 { 579 UInt minoff, maxoff; 580 getArrayBounds( descr, &minoff, &maxoff ); 581 vassert((minoff & ~0xFFFF) == 0); 582 vassert((maxoff & ~0xFFFF) == 0); 583 return (minoff << 16) | maxoff; 584 } 585 586 /* Supposing h has keys of the form generated by mk_key_GetPut and 587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo 588 .. k_hi). 589 */ 590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) 591 { 592 Int j; 593 UInt e_lo, e_hi; 594 vassert(k_lo <= k_hi); 595 /* invalidate any env entries which in any way overlap (k_lo 596 .. k_hi) */ 597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */ 598 599 for (j = 0; j < h->used; j++) { 600 if (!h->inuse[j]) 601 continue; 602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF; 603 e_hi = ((UInt)h->key[j]) & 0xFFFF; 604 vassert(e_lo <= e_hi); 605 if (e_hi < k_lo || k_hi < e_lo) 606 continue; /* no overlap possible */ 607 else 608 /* overlap; invalidate */ 609 h->inuse[j] = False; 610 } 611 } 612 613 614 static void redundant_get_removal_BB ( IRSB* bb ) 615 { 616 HashHW* env = newHHW(); 617 UInt key = 0; /* keep gcc -O happy */ 618 Int i, j; 619 HWord val; 620 621 for (i = 0; i < bb->stmts_used; i++) { 622 IRStmt* st = bb->stmts[i]; 623 624 if (st->tag == Ist_NoOp) 625 continue; 626 627 /* Deal with Gets */ 628 if (st->tag == Ist_WrTmp 629 && st->Ist.WrTmp.data->tag == Iex_Get) { 630 /* st is 't = Get(...)'. Look up in the environment and see 631 if the Get can be replaced. */ 632 IRExpr* get = st->Ist.WrTmp.data; 633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset, 634 get->Iex.Get.ty ); 635 if (lookupHHW(env, &val, (HWord)key)) { 636 /* found it */ 637 /* Note, we could do better here. If the types are 638 different we don't do the substitution, since doing so 639 could lead to invalidly-typed IR. An improvement would 640 be to stick in a reinterpret-style cast, although that 641 would make maintaining flatness more difficult. */ 642 IRExpr* valE = (IRExpr*)val; 643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) 644 == st->Ist.WrTmp.data->Iex.Get.ty ); 645 if (typesOK && DEBUG_IROPT) { 646 vex_printf("rGET: "); ppIRExpr(get); 647 vex_printf(" -> "); ppIRExpr(valE); 648 vex_printf("\n"); 649 } 650 if (typesOK) 651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); 652 } else { 653 /* Not found, but at least we know that t and the Get(...) 654 are now associated. So add a binding to reflect that 655 fact. */ 656 addToHHW( env, (HWord)key, 657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) ); 658 } 659 } 660 661 /* Deal with Puts: invalidate any env entries overlapped by this 662 Put */ 663 if (st->tag == Ist_Put || st->tag == Ist_PutI) { 664 UInt k_lo, k_hi; 665 if (st->tag == Ist_Put) { 666 key = mk_key_GetPut( st->Ist.Put.offset, 667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 668 } else { 669 vassert(st->tag == Ist_PutI); 670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 671 } 672 673 k_lo = (key >> 16) & 0xFFFF; 674 k_hi = key & 0xFFFF; 675 invalidateOverlaps(env, k_lo, k_hi); 676 } 677 else 678 if (st->tag == Ist_Dirty) { 679 /* Deal with dirty helpers which write or modify guest state. 680 Invalidate the entire env. We could do a lot better 681 here. */ 682 IRDirty* d = st->Ist.Dirty.details; 683 Bool writes = False; 684 for (j = 0; j < d->nFxState; j++) { 685 if (d->fxState[j].fx == Ifx_Modify 686 || d->fxState[j].fx == Ifx_Write) 687 writes = True; 688 } 689 if (writes) { 690 /* dump the entire env (not clever, but correct ...) */ 691 for (j = 0; j < env->used; j++) 692 env->inuse[j] = False; 693 if (0) vex_printf("rGET: trash env due to dirty helper\n"); 694 } 695 } 696 697 /* add this one to the env, if appropriate */ 698 if (st->tag == Ist_Put) { 699 vassert(isIRAtom(st->Ist.Put.data)); 700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); 701 } 702 703 } /* for (i = 0; i < bb->stmts_used; i++) */ 704 705 } 706 707 708 /*---------------------------------------------------------------*/ 709 /*--- In-place removal of redundant PUTs ---*/ 710 /*---------------------------------------------------------------*/ 711 712 /* Find any Get uses in st and invalidate any partially or fully 713 overlapping ranges listed in env. Due to the flattening phase, the 714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ 715 716 static void handle_gets_Stmt ( 717 HashHW* env, 718 IRStmt* st, 719 Bool (*preciseMemExnsFn)(Int,Int) 720 ) 721 { 722 Int j; 723 UInt key = 0; /* keep gcc -O happy */ 724 Bool isGet; 725 Bool memRW = False; 726 IRExpr* e; 727 728 switch (st->tag) { 729 730 /* This is the only interesting case. Deal with Gets in the RHS 731 expression. */ 732 case Ist_WrTmp: 733 e = st->Ist.WrTmp.data; 734 switch (e->tag) { 735 case Iex_Get: 736 isGet = True; 737 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty ); 738 break; 739 case Iex_GetI: 740 isGet = True; 741 key = mk_key_GetIPutI ( e->Iex.GetI.descr ); 742 break; 743 case Iex_Load: 744 isGet = False; 745 memRW = True; 746 break; 747 default: 748 isGet = False; 749 } 750 if (isGet) { 751 UInt k_lo, k_hi; 752 k_lo = (key >> 16) & 0xFFFF; 753 k_hi = key & 0xFFFF; 754 invalidateOverlaps(env, k_lo, k_hi); 755 } 756 break; 757 758 /* Be very conservative for dirty helper calls; dump the entire 759 environment. The helper might read guest state, in which 760 case it needs to be flushed first. Also, the helper might 761 access guest memory, in which case all parts of the guest 762 state requiring precise exceptions needs to be flushed. The 763 crude solution is just to flush everything; we could easily 764 enough do a lot better if needed. */ 765 /* Probably also overly-conservative, but also dump everything 766 if we hit a memory bus event (fence, lock, unlock). Ditto 767 AbiHints, CASs, LLs and SCs. */ 768 case Ist_AbiHint: 769 vassert(isIRAtom(st->Ist.AbiHint.base)); 770 vassert(isIRAtom(st->Ist.AbiHint.nia)); 771 /* fall through */ 772 case Ist_MBE: 773 case Ist_Dirty: 774 case Ist_CAS: 775 case Ist_LLSC: 776 for (j = 0; j < env->used; j++) 777 env->inuse[j] = False; 778 break; 779 780 /* all other cases are boring. */ 781 case Ist_Store: 782 vassert(isIRAtom(st->Ist.Store.addr)); 783 vassert(isIRAtom(st->Ist.Store.data)); 784 memRW = True; 785 break; 786 case Ist_StoreG: { 787 IRStoreG* sg = st->Ist.StoreG.details; 788 vassert(isIRAtom(sg->addr)); 789 vassert(isIRAtom(sg->data)); 790 vassert(isIRAtom(sg->guard)); 791 memRW = True; 792 break; 793 } 794 case Ist_LoadG: { 795 IRLoadG* lg = st->Ist.LoadG.details; 796 vassert(isIRAtom(lg->addr)); 797 vassert(isIRAtom(lg->alt)); 798 vassert(isIRAtom(lg->guard)); 799 memRW = True; 800 break; 801 } 802 case Ist_Exit: 803 vassert(isIRAtom(st->Ist.Exit.guard)); 804 break; 805 806 case Ist_PutI: 807 vassert(isIRAtom(st->Ist.PutI.details->ix)); 808 vassert(isIRAtom(st->Ist.PutI.details->data)); 809 break; 810 811 case Ist_NoOp: 812 case Ist_IMark: 813 break; 814 815 default: 816 vex_printf("\n"); 817 ppIRStmt(st); 818 vex_printf("\n"); 819 vpanic("handle_gets_Stmt"); 820 } 821 822 if (memRW) { 823 /* This statement accesses memory. So we might need to dump all parts 824 of the environment corresponding to guest state that may not 825 be reordered with respect to memory references. That means 826 at least the stack pointer. */ 827 switch (vex_control.iropt_register_updates) { 828 case VexRegUpdAllregsAtMemAccess: 829 /* Precise exceptions required at mem access. 830 Flush all guest state. */ 831 for (j = 0; j < env->used; j++) 832 env->inuse[j] = False; 833 break; 834 case VexRegUpdSpAtMemAccess: 835 /* We need to dump the stack pointer 836 (needed for stack extension in m_signals.c). 837 preciseMemExnsFn will use vex_control.iropt_register_updates 838 to verify only the sp is to be checked. */ 839 /* fallthrough */ 840 case VexRegUpdUnwindregsAtMemAccess: 841 for (j = 0; j < env->used; j++) { 842 if (!env->inuse[j]) 843 continue; 844 /* Just flush the minimal amount required, as computed by 845 preciseMemExnsFn. */ 846 HWord k_lo = (env->key[j] >> 16) & 0xFFFF; 847 HWord k_hi = env->key[j] & 0xFFFF; 848 if (preciseMemExnsFn( k_lo, k_hi )) 849 env->inuse[j] = False; 850 } 851 break; 852 default: 853 // VexRegUpdAllregsAtEachInsn cannot happen here. 854 // Neither any rubbish other value. 855 vassert(0); 856 } 857 } /* if (memRW) */ 858 859 } 860 861 862 /* Scan backwards, building up a set of (min offset, max 863 offset) pairs, indicating those parts of the guest state 864 for which the next event is a write. 865 866 On seeing a conditional exit, empty the set. 867 868 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is 869 completely within the set, remove the Put. Otherwise, add 870 (minoff,maxoff) to the set. 871 872 On seeing 'Get (minoff,maxoff)', remove any part of the set 873 overlapping (minoff,maxoff). The same has to happen for any events 874 which implicitly read parts of the guest state: dirty helper calls 875 and loads/stores. 876 */ 877 878 static void redundant_put_removal_BB ( 879 IRSB* bb, 880 Bool (*preciseMemExnsFn)(Int,Int) 881 ) 882 { 883 Int i, j; 884 Bool isPut; 885 IRStmt* st; 886 UInt key = 0; /* keep gcc -O happy */ 887 888 vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn); 889 890 HashHW* env = newHHW(); 891 892 /* Initialise the running env with the fact that the final exit 893 writes the IP (or, whatever it claims to write. We don't 894 care.) */ 895 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); 896 addToHHW(env, (HWord)key, 0); 897 898 /* And now scan backwards through the statements. */ 899 for (i = bb->stmts_used-1; i >= 0; i--) { 900 st = bb->stmts[i]; 901 902 if (st->tag == Ist_NoOp) 903 continue; 904 905 /* Deal with conditional exits. */ 906 if (st->tag == Ist_Exit) { 907 //Bool re_add; 908 /* Need to throw out from the env, any part of it which 909 doesn't overlap with the guest state written by this exit. 910 Since the exit only writes one section, it's simplest to 911 do this: (1) check whether env contains a write that 912 completely overlaps the write done by this exit; (2) empty 913 out env; and (3) if (1) was true, add the write done by 914 this exit. 915 916 To make (1) a bit simpler, merely search for a write that 917 exactly matches the one done by this exit. That's safe 918 because it will fail as often or more often than a full 919 overlap check, and failure to find an overlapping write in 920 env is the safe case (we just nuke env if that 921 happens). */ 922 //vassert(isIRAtom(st->Ist.Exit.guard)); 923 /* (1) */ 924 //key = mk_key_GetPut(st->Ist.Exit.offsIP, 925 // typeOfIRConst(st->Ist.Exit.dst)); 926 //re_add = lookupHHW(env, NULL, key); 927 /* (2) */ 928 for (j = 0; j < env->used; j++) 929 env->inuse[j] = False; 930 /* (3) */ 931 //if (0 && re_add) 932 // addToHHW(env, (HWord)key, 0); 933 continue; 934 } 935 936 /* Deal with Puts */ 937 switch (st->tag) { 938 case Ist_Put: 939 isPut = True; 940 key = mk_key_GetPut( st->Ist.Put.offset, 941 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 942 vassert(isIRAtom(st->Ist.Put.data)); 943 break; 944 case Ist_PutI: 945 isPut = True; 946 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 947 vassert(isIRAtom(st->Ist.PutI.details->ix)); 948 vassert(isIRAtom(st->Ist.PutI.details->data)); 949 break; 950 default: 951 isPut = False; 952 } 953 if (isPut && st->tag != Ist_PutI) { 954 /* See if any single entry in env overlaps this Put. This is 955 simplistic in that the transformation is valid if, say, two 956 or more entries in the env overlap this Put, but the use of 957 lookupHHW will only find a single entry which exactly 958 overlaps this Put. This is suboptimal but safe. */ 959 if (lookupHHW(env, NULL, (HWord)key)) { 960 /* This Put is redundant because a later one will overwrite 961 it. So NULL (nop) it out. */ 962 if (DEBUG_IROPT) { 963 vex_printf("rPUT: "); ppIRStmt(st); 964 vex_printf("\n"); 965 } 966 bb->stmts[i] = IRStmt_NoOp(); 967 } else { 968 /* We can't demonstrate that this Put is redundant, so add it 969 to the running collection. */ 970 addToHHW(env, (HWord)key, 0); 971 } 972 continue; 973 } 974 975 /* Deal with Gets. These remove bits of the environment since 976 appearance of a Get means that the next event for that slice 977 of the guest state is no longer a write, but a read. Also 978 deals with implicit reads of guest state needed to maintain 979 precise exceptions. */ 980 handle_gets_Stmt( env, st, preciseMemExnsFn ); 981 } 982 } 983 984 985 /*---------------------------------------------------------------*/ 986 /*--- Constant propagation and folding ---*/ 987 /*---------------------------------------------------------------*/ 988 989 #if STATS_IROPT 990 /* How often sameIRExprs was invoked */ 991 static UInt invocation_count; 992 /* How often sameIRExprs recursed through IRTemp assignments */ 993 static UInt recursion_count; 994 /* How often sameIRExprs found identical IRExprs */ 995 static UInt success_count; 996 /* How often recursing through assignments to IRTemps helped 997 establishing equality. */ 998 static UInt recursion_success_count; 999 /* Whether or not recursing through an IRTemp assignment helped 1000 establishing IRExpr equality for a given sameIRExprs invocation. */ 1001 static Bool recursion_helped; 1002 /* Whether or not a given sameIRExprs invocation recursed through an 1003 IRTemp assignment */ 1004 static Bool recursed; 1005 /* Maximum number of nodes ever visited when comparing two IRExprs. */ 1006 static UInt max_nodes_visited; 1007 #endif /* STATS_IROPT */ 1008 1009 /* Count the number of nodes visited for a given sameIRExprs invocation. */ 1010 static UInt num_nodes_visited; 1011 1012 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs. 1013 This is to guard against performance degradation by visiting large 1014 trees without success. */ 1015 #define NODE_LIMIT 30 1016 1017 1018 /* The env in this section is a map from IRTemp to IRExpr*, 1019 that is, an array indexed by IRTemp. */ 1020 1021 /* Do both expressions compute the same value? The answer is generally 1022 conservative, i.e. it will report that the expressions do not compute 1023 the same value when in fact they do. The reason is that we do not 1024 keep track of changes in the guest state and memory. Thusly, two 1025 Get's, GetI's or Load's, even when accessing the same location, will be 1026 assumed to compute different values. After all the accesses may happen 1027 at different times and the guest state / memory can have changed in 1028 the meantime. 1029 1030 XXX IMPORTANT XXX the two expressions must have the same IR type. 1031 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */ 1032 1033 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable 1034 wrapper that deals with the common tags-don't-match case, and a 1035 slower out of line general case. Saves a few insns. */ 1036 1037 __attribute__((noinline)) 1038 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); 1039 1040 inline 1041 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1042 { 1043 if (e1->tag != e2->tag) return False; 1044 return sameIRExprs_aux2(env, e1, e2); 1045 } 1046 1047 __attribute__((noinline)) 1048 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1049 { 1050 if (num_nodes_visited++ > NODE_LIMIT) return False; 1051 1052 switch (e1->tag) { 1053 case Iex_RdTmp: 1054 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; 1055 1056 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { 1057 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp], 1058 env[e2->Iex.RdTmp.tmp]); 1059 #if STATS_IROPT 1060 recursed = True; 1061 if (same) recursion_helped = True; 1062 #endif 1063 return same; 1064 } 1065 return False; 1066 1067 case Iex_Get: 1068 case Iex_GetI: 1069 case Iex_Load: 1070 /* Guest state / memory could have changed in the meantime. */ 1071 return False; 1072 1073 case Iex_Binop: 1074 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op 1075 && sameIRExprs_aux( env, e1->Iex.Binop.arg1, 1076 e2->Iex.Binop.arg1 ) 1077 && sameIRExprs_aux( env, e1->Iex.Binop.arg2, 1078 e2->Iex.Binop.arg2 )); 1079 1080 case Iex_Unop: 1081 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op 1082 && sameIRExprs_aux( env, e1->Iex.Unop.arg, 1083 e2->Iex.Unop.arg )); 1084 1085 case Iex_Const: { 1086 IRConst *c1 = e1->Iex.Const.con; 1087 IRConst *c2 = e2->Iex.Const.con; 1088 vassert(c1->tag == c2->tag); 1089 switch (c1->tag) { 1090 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 ); 1091 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 ); 1092 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 ); 1093 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 ); 1094 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 ); 1095 default: break; 1096 } 1097 return False; 1098 } 1099 1100 case Iex_Triop: { 1101 IRTriop *tri1 = e1->Iex.Triop.details; 1102 IRTriop *tri2 = e2->Iex.Triop.details; 1103 return toBool( tri1->op == tri2->op 1104 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 ) 1105 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 ) 1106 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 )); 1107 } 1108 1109 case Iex_ITE: 1110 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond, 1111 e2->Iex.ITE.cond ) 1112 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue, 1113 e2->Iex.ITE.iftrue ) 1114 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse, 1115 e2->Iex.ITE.iffalse )); 1116 1117 default: 1118 /* Not very likely to be "same". */ 1119 break; 1120 } 1121 1122 return False; 1123 } 1124 1125 inline 1126 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1127 { 1128 Bool same; 1129 1130 num_nodes_visited = 0; 1131 same = sameIRExprs_aux(env, e1, e2); 1132 1133 #if STATS_IROPT 1134 ++invocation_count; 1135 if (recursed) ++recursion_count; 1136 success_count += same; 1137 if (same && recursion_helped) 1138 ++recursion_success_count; 1139 if (num_nodes_visited > max_nodes_visited) 1140 max_nodes_visited = num_nodes_visited; 1141 recursed = False; /* reset */ 1142 recursion_helped = False; /* reset */ 1143 #endif /* STATS_IROPT */ 1144 1145 return same; 1146 } 1147 1148 1149 /* Debugging-only hack (not used in production runs): make a guess 1150 whether sameIRExprs might assert due to the two args being of 1151 different types. If in doubt return False. Is only used when 1152 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0. 1153 Bad because it duplicates functionality from typeOfIRExpr. See 1154 comment on the single use point below for rationale. */ 1155 static 1156 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) 1157 { 1158 if (e1->tag != e2->tag) return False; 1159 switch (e1->tag) { 1160 case Iex_Const: { 1161 /* The only interesting case */ 1162 IRConst *c1 = e1->Iex.Const.con; 1163 IRConst *c2 = e2->Iex.Const.con; 1164 return c1->tag != c2->tag; 1165 } 1166 default: 1167 break; 1168 } 1169 return False; 1170 } 1171 1172 1173 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ 1174 static Bool isZeroU32 ( IRExpr* e ) 1175 { 1176 return toBool( e->tag == Iex_Const 1177 && e->Iex.Const.con->tag == Ico_U32 1178 && e->Iex.Const.con->Ico.U32 == 0); 1179 } 1180 1181 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? */ 1182 static Bool isZeroU64 ( IRExpr* e ) 1183 { 1184 return toBool( e->tag == Iex_Const 1185 && e->Iex.Const.con->tag == Ico_U64 1186 && e->Iex.Const.con->Ico.U64 == 0); 1187 } 1188 1189 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */ 1190 static Bool isZeroV128 ( IRExpr* e ) 1191 { 1192 return toBool( e->tag == Iex_Const 1193 && e->Iex.Const.con->tag == Ico_V128 1194 && e->Iex.Const.con->Ico.V128 == 0x0000); 1195 } 1196 1197 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */ 1198 static Bool isZeroV256 ( IRExpr* e ) 1199 { 1200 return toBool( e->tag == Iex_Const 1201 && e->Iex.Const.con->tag == Ico_V256 1202 && e->Iex.Const.con->Ico.V256 == 0x00000000); 1203 } 1204 1205 /* Is this an integer constant with value 0 ? */ 1206 static Bool isZeroU ( IRExpr* e ) 1207 { 1208 if (e->tag != Iex_Const) return False; 1209 switch (e->Iex.Const.con->tag) { 1210 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0); 1211 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0); 1212 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0); 1213 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0); 1214 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0); 1215 default: vpanic("isZeroU"); 1216 } 1217 } 1218 1219 /* Is this an integer constant with value 1---1b ? */ 1220 static Bool isOnesU ( IRExpr* e ) 1221 { 1222 if (e->tag != Iex_Const) return False; 1223 switch (e->Iex.Const.con->tag) { 1224 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF); 1225 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF); 1226 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 1227 == 0xFFFFFFFF); 1228 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 1229 == 0xFFFFFFFFFFFFFFFFULL); 1230 default: ppIRExpr(e); vpanic("isOnesU"); 1231 } 1232 } 1233 1234 static Bool notBool ( Bool b ) 1235 { 1236 if (b == True) return False; 1237 if (b == False) return True; 1238 vpanic("notBool"); 1239 } 1240 1241 /* Make a zero which has the same type as the result of the given 1242 primop. */ 1243 static IRExpr* mkZeroOfPrimopResultType ( IROp op ) 1244 { 1245 switch (op) { 1246 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0))); 1247 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0)); 1248 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); 1249 case Iop_Sub32: 1250 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); 1251 case Iop_And64: 1252 case Iop_Sub64: 1253 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); 1254 case Iop_XorV128: 1255 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0)); 1256 default: vpanic("mkZeroOfPrimopResultType: bad primop"); 1257 } 1258 } 1259 1260 /* Make a value containing all 1-bits, which has the same type as the 1261 result of the given primop. */ 1262 static IRExpr* mkOnesOfPrimopResultType ( IROp op ) 1263 { 1264 switch (op) { 1265 case Iop_CmpEQ32: 1266 case Iop_CmpEQ64: 1267 return IRExpr_Const(IRConst_U1(toBool(1))); 1268 case Iop_Or8: 1269 return IRExpr_Const(IRConst_U8(0xFF)); 1270 case Iop_Or16: 1271 return IRExpr_Const(IRConst_U16(0xFFFF)); 1272 case Iop_Or32: 1273 return IRExpr_Const(IRConst_U32(0xFFFFFFFF)); 1274 case Iop_CmpEQ8x8: 1275 case Iop_Or64: 1276 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL)); 1277 case Iop_CmpEQ8x16: 1278 case Iop_CmpEQ16x8: 1279 case Iop_CmpEQ32x4: 1280 return IRExpr_Const(IRConst_V128(0xFFFF)); 1281 default: 1282 ppIROp(op); 1283 vpanic("mkOnesOfPrimopResultType: bad primop"); 1284 } 1285 } 1286 1287 /* Helpers for folding Clz32/64. */ 1288 static UInt fold_Clz64 ( ULong value ) 1289 { 1290 UInt i; 1291 vassert(value != 0ULL); /* no defined semantics for arg==0 */ 1292 for (i = 0; i < 64; ++i) { 1293 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i; 1294 } 1295 vassert(0); 1296 /*NOTREACHED*/ 1297 return 0; 1298 } 1299 1300 static UInt fold_Clz32 ( UInt value ) 1301 { 1302 UInt i; 1303 vassert(value != 0); /* no defined semantics for arg==0 */ 1304 for (i = 0; i < 32; ++i) { 1305 if (0 != (value & (((UInt)1) << (31 - i)))) return i; 1306 } 1307 vassert(0); 1308 /*NOTREACHED*/ 1309 return 0; 1310 } 1311 1312 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to 1313 the corresponding real constant. */ 1314 //XXX re-check this before use 1315 //static ULong de_summarise_V64 ( UChar v64 ) 1316 //{ 1317 // ULong r = 0; 1318 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL; 1319 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL; 1320 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL; 1321 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL; 1322 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL; 1323 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL; 1324 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL; 1325 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL; 1326 // return r; 1327 //} 1328 1329 /* Helper for arbitrary expression pattern matching in flat IR. If 1330 'e' is a reference to a tmp, look it up in env -- repeatedly, if 1331 necessary -- until it resolves to a non-tmp. Note that this can 1332 return NULL if it can't resolve 'e' to a new expression, which will 1333 be the case if 'e' is instead defined by an IRStmt (IRDirty or 1334 LLSC). */ 1335 static IRExpr* chase ( IRExpr** env, IRExpr* e ) 1336 { 1337 /* Why is this loop guaranteed to terminate? Because all tmps must 1338 have definitions before use, hence a tmp cannot be bound 1339 (directly or indirectly) to itself. */ 1340 while (e->tag == Iex_RdTmp) { 1341 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); } 1342 e = env[(Int)e->Iex.RdTmp.tmp]; 1343 if (e == NULL) break; 1344 } 1345 return e; 1346 } 1347 1348 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) 1349 { 1350 Int shift; 1351 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ 1352 1353 switch (e->tag) { 1354 case Iex_Unop: 1355 /* UNARY ops */ 1356 if (e->Iex.Unop.arg->tag == Iex_Const) { 1357 switch (e->Iex.Unop.op) { 1358 case Iop_1Uto8: 1359 e2 = IRExpr_Const(IRConst_U8(toUChar( 1360 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1361 ? 1 : 0))); 1362 break; 1363 case Iop_1Uto32: 1364 e2 = IRExpr_Const(IRConst_U32( 1365 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1366 ? 1 : 0)); 1367 break; 1368 case Iop_1Uto64: 1369 e2 = IRExpr_Const(IRConst_U64( 1370 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1371 ? 1 : 0)); 1372 break; 1373 1374 case Iop_1Sto8: 1375 e2 = IRExpr_Const(IRConst_U8(toUChar( 1376 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1377 ? 0xFF : 0))); 1378 break; 1379 case Iop_1Sto16: 1380 e2 = IRExpr_Const(IRConst_U16(toUShort( 1381 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1382 ? 0xFFFF : 0))); 1383 break; 1384 case Iop_1Sto32: 1385 e2 = IRExpr_Const(IRConst_U32( 1386 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1387 ? 0xFFFFFFFF : 0)); 1388 break; 1389 case Iop_1Sto64: 1390 e2 = IRExpr_Const(IRConst_U64( 1391 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1392 ? 0xFFFFFFFFFFFFFFFFULL : 0)); 1393 break; 1394 1395 case Iop_8Sto32: { 1396 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1397 s32 <<= 24; 1398 s32 >>= 24; 1399 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1400 break; 1401 } 1402 case Iop_16Sto32: { 1403 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1404 s32 <<= 16; 1405 s32 >>= 16; 1406 e2 = IRExpr_Const(IRConst_U32( (UInt)s32) ); 1407 break; 1408 } 1409 case Iop_8Uto64: 1410 e2 = IRExpr_Const(IRConst_U64( 1411 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1412 break; 1413 case Iop_16Uto64: 1414 e2 = IRExpr_Const(IRConst_U64( 1415 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1416 break; 1417 case Iop_8Uto32: 1418 e2 = IRExpr_Const(IRConst_U32( 1419 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1420 break; 1421 case Iop_8Sto16: { 1422 /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1423 s16 <<= 8; 1424 s16 >>= 8; 1425 e2 = IRExpr_Const(IRConst_U16( (UShort)s16) ); 1426 break; 1427 } 1428 case Iop_8Uto16: 1429 e2 = IRExpr_Const(IRConst_U16( 1430 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1431 break; 1432 case Iop_16Uto32: 1433 e2 = IRExpr_Const(IRConst_U32( 1434 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1435 break; 1436 case Iop_32to16: 1437 e2 = IRExpr_Const(IRConst_U16(toUShort( 1438 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1439 break; 1440 case Iop_32to8: 1441 e2 = IRExpr_Const(IRConst_U8(toUChar( 1442 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1443 break; 1444 case Iop_32to1: 1445 e2 = IRExpr_Const(IRConst_U1(toBool( 1446 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1447 ))); 1448 break; 1449 case Iop_64to1: 1450 e2 = IRExpr_Const(IRConst_U1(toBool( 1451 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64) 1452 ))); 1453 break; 1454 1455 case Iop_NotV128: 1456 e2 = IRExpr_Const(IRConst_V128( 1457 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128))); 1458 break; 1459 case Iop_Not64: 1460 e2 = IRExpr_Const(IRConst_U64( 1461 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); 1462 break; 1463 case Iop_Not32: 1464 e2 = IRExpr_Const(IRConst_U32( 1465 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1466 break; 1467 case Iop_Not16: 1468 e2 = IRExpr_Const(IRConst_U16(toUShort( 1469 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16)))); 1470 break; 1471 case Iop_Not8: 1472 e2 = IRExpr_Const(IRConst_U8(toUChar( 1473 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); 1474 break; 1475 1476 case Iop_Not1: 1477 e2 = IRExpr_Const(IRConst_U1( 1478 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); 1479 break; 1480 1481 case Iop_64to8: { 1482 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1483 w64 &= 0xFFULL; 1484 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 )); 1485 break; 1486 } 1487 case Iop_64to16: { 1488 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1489 w64 &= 0xFFFFULL; 1490 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 )); 1491 break; 1492 } 1493 case Iop_64to32: { 1494 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1495 w64 &= 0x00000000FFFFFFFFULL; 1496 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1497 break; 1498 } 1499 case Iop_64HIto32: { 1500 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1501 w64 >>= 32; 1502 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1503 break; 1504 } 1505 case Iop_32Uto64: 1506 e2 = IRExpr_Const(IRConst_U64( 1507 0xFFFFFFFFULL 1508 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)); 1509 break; 1510 case Iop_16Sto64: { 1511 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1512 s64 <<= 48; 1513 s64 >>= 48; 1514 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1515 break; 1516 } 1517 case Iop_32Sto64: { 1518 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1519 s64 <<= 32; 1520 s64 >>= 32; 1521 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1522 break; 1523 } 1524 1525 case Iop_16to8: { 1526 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1527 w16 &= 0xFF; 1528 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1529 break; 1530 } 1531 case Iop_16HIto8: { 1532 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1533 w16 >>= 8; 1534 w16 &= 0xFF; 1535 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1536 break; 1537 } 1538 1539 case Iop_CmpNEZ8: 1540 e2 = IRExpr_Const(IRConst_U1(toBool( 1541 0 != 1542 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) 1543 ))); 1544 break; 1545 case Iop_CmpNEZ32: 1546 e2 = IRExpr_Const(IRConst_U1(toBool( 1547 0 != 1548 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1549 ))); 1550 break; 1551 case Iop_CmpNEZ64: 1552 e2 = IRExpr_Const(IRConst_U1(toBool( 1553 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64 1554 ))); 1555 break; 1556 1557 case Iop_CmpwNEZ32: { 1558 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1559 if (w32 == 0) 1560 e2 = IRExpr_Const(IRConst_U32( 0 )); 1561 else 1562 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); 1563 break; 1564 } 1565 case Iop_CmpwNEZ64: { 1566 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1567 if (w64 == 0) 1568 e2 = IRExpr_Const(IRConst_U64( 0 )); 1569 else 1570 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); 1571 break; 1572 } 1573 1574 case Iop_Left32: { 1575 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1576 Int s32 = (Int)(u32 & 0xFFFFFFFF); 1577 s32 = (s32 | (-s32)); 1578 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); 1579 break; 1580 } 1581 1582 case Iop_Left64: { 1583 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1584 Long s64 = (Long)u64; 1585 s64 = (s64 | (-s64)); 1586 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); 1587 break; 1588 } 1589 1590 case Iop_Clz32: { 1591 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1592 if (u32 != 0) 1593 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32))); 1594 break; 1595 } 1596 case Iop_Clz64: { 1597 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1598 if (u64 != 0ULL) 1599 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64))); 1600 break; 1601 } 1602 1603 /* For these vector ones, can't fold all cases, but at least 1604 do the most obvious one. Could do better here using 1605 summarise/desummarise of vector constants, but too 1606 difficult to verify; hence just handle the zero cases. */ 1607 case Iop_32UtoV128: { 1608 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1609 if (0 == u32) { 1610 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1611 } else { 1612 goto unhandled; 1613 } 1614 break; 1615 } 1616 case Iop_V128to64: { 1617 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1618 if (0 == ((v128 >> 0) & 0xFF)) { 1619 e2 = IRExpr_Const(IRConst_U64(0)); 1620 } else { 1621 goto unhandled; 1622 } 1623 break; 1624 } 1625 case Iop_V128HIto64: { 1626 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1627 if (0 == ((v128 >> 8) & 0xFF)) { 1628 e2 = IRExpr_Const(IRConst_U64(0)); 1629 } else { 1630 goto unhandled; 1631 } 1632 break; 1633 } 1634 case Iop_64UtoV128: { 1635 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1636 if (0 == u64) { 1637 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1638 } else { 1639 goto unhandled; 1640 } 1641 break; 1642 } 1643 1644 /* Even stupider (although still correct ..) */ 1645 case Iop_V256to64_0: case Iop_V256to64_1: 1646 case Iop_V256to64_2: case Iop_V256to64_3: { 1647 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256; 1648 if (v256 == 0x00000000) { 1649 e2 = IRExpr_Const(IRConst_U64(0)); 1650 } else { 1651 goto unhandled; 1652 } 1653 break; 1654 } 1655 1656 default: 1657 goto unhandled; 1658 } 1659 } 1660 break; 1661 1662 case Iex_Binop: 1663 /* BINARY ops */ 1664 if (e->Iex.Binop.arg1->tag == Iex_Const 1665 && e->Iex.Binop.arg2->tag == Iex_Const) { 1666 /* cases where both args are consts */ 1667 switch (e->Iex.Binop.op) { 1668 1669 /* -- Or -- */ 1670 case Iop_Or8: 1671 e2 = IRExpr_Const(IRConst_U8(toUChar( 1672 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1673 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1674 break; 1675 case Iop_Or16: 1676 e2 = IRExpr_Const(IRConst_U16(toUShort( 1677 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1678 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1679 break; 1680 case Iop_Or32: 1681 e2 = IRExpr_Const(IRConst_U32( 1682 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1683 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1684 break; 1685 case Iop_Or64: 1686 e2 = IRExpr_Const(IRConst_U64( 1687 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1688 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1689 break; 1690 case Iop_OrV128: 1691 e2 = IRExpr_Const(IRConst_V128( 1692 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1693 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1694 break; 1695 1696 /* -- Xor -- */ 1697 case Iop_Xor8: 1698 e2 = IRExpr_Const(IRConst_U8(toUChar( 1699 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1700 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1701 break; 1702 case Iop_Xor16: 1703 e2 = IRExpr_Const(IRConst_U16(toUShort( 1704 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1705 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1706 break; 1707 case Iop_Xor32: 1708 e2 = IRExpr_Const(IRConst_U32( 1709 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1710 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1711 break; 1712 case Iop_Xor64: 1713 e2 = IRExpr_Const(IRConst_U64( 1714 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1715 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1716 break; 1717 case Iop_XorV128: 1718 e2 = IRExpr_Const(IRConst_V128( 1719 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1720 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1721 break; 1722 1723 /* -- And -- */ 1724 case Iop_And8: 1725 e2 = IRExpr_Const(IRConst_U8(toUChar( 1726 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1727 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1728 break; 1729 case Iop_And16: 1730 e2 = IRExpr_Const(IRConst_U16(toUShort( 1731 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1732 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1733 break; 1734 case Iop_And32: 1735 e2 = IRExpr_Const(IRConst_U32( 1736 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1737 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1738 break; 1739 case Iop_And64: 1740 e2 = IRExpr_Const(IRConst_U64( 1741 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1742 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1743 break; 1744 case Iop_AndV128: 1745 e2 = IRExpr_Const(IRConst_V128( 1746 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1747 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1748 break; 1749 1750 /* -- Add -- */ 1751 case Iop_Add8: 1752 e2 = IRExpr_Const(IRConst_U8(toUChar( 1753 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1754 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1755 break; 1756 case Iop_Add32: 1757 e2 = IRExpr_Const(IRConst_U32( 1758 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1759 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1760 break; 1761 case Iop_Add64: 1762 e2 = IRExpr_Const(IRConst_U64( 1763 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1764 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1765 break; 1766 1767 /* -- Sub -- */ 1768 case Iop_Sub8: 1769 e2 = IRExpr_Const(IRConst_U8(toUChar( 1770 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1771 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1772 break; 1773 case Iop_Sub32: 1774 e2 = IRExpr_Const(IRConst_U32( 1775 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1776 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1777 break; 1778 case Iop_Sub64: 1779 e2 = IRExpr_Const(IRConst_U64( 1780 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1781 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1782 break; 1783 1784 /* -- Max32U -- */ 1785 case Iop_Max32U: { 1786 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1787 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1788 UInt res = u32a > u32b ? u32a : u32b; 1789 e2 = IRExpr_Const(IRConst_U32(res)); 1790 break; 1791 } 1792 1793 /* -- Mul -- */ 1794 case Iop_Mul32: 1795 e2 = IRExpr_Const(IRConst_U32( 1796 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1797 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1798 break; 1799 case Iop_Mul64: 1800 e2 = IRExpr_Const(IRConst_U64( 1801 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1802 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1803 break; 1804 1805 case Iop_MullS32: { 1806 /* very paranoid */ 1807 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1808 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1809 Int s32a = (Int)u32a; 1810 Int s32b = (Int)u32b; 1811 Long s64a = (Long)s32a; 1812 Long s64b = (Long)s32b; 1813 Long sres = s64a * s64b; 1814 ULong ures = (ULong)sres; 1815 e2 = IRExpr_Const(IRConst_U64(ures)); 1816 break; 1817 } 1818 1819 /* -- Shl -- */ 1820 case Iop_Shl32: 1821 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1822 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1823 if (shift >= 0 && shift <= 31) 1824 e2 = IRExpr_Const(IRConst_U32( 1825 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1826 << shift))); 1827 break; 1828 case Iop_Shl64: 1829 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1830 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1831 if (shift >= 0 && shift <= 63) 1832 e2 = IRExpr_Const(IRConst_U64( 1833 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1834 << shift))); 1835 break; 1836 1837 /* -- Sar -- */ 1838 case Iop_Sar32: { 1839 /* paranoid ... */ 1840 /*signed*/ Int s32; 1841 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1842 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1843 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1844 if (shift >= 0 && shift <= 31) { 1845 s32 >>=/*signed*/ shift; 1846 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1847 } 1848 break; 1849 } 1850 case Iop_Sar64: { 1851 /* paranoid ... */ 1852 /*signed*/ Long s64; 1853 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1854 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1855 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1856 if (shift >= 0 && shift <= 63) { 1857 s64 >>=/*signed*/ shift; 1858 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1859 } 1860 break; 1861 } 1862 1863 /* -- Shr -- */ 1864 case Iop_Shr32: { 1865 /* paranoid ... */ 1866 /*unsigned*/ UInt u32; 1867 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1868 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1869 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1870 if (shift >= 0 && shift <= 31) { 1871 u32 >>=/*unsigned*/ shift; 1872 e2 = IRExpr_Const(IRConst_U32(u32)); 1873 } 1874 break; 1875 } 1876 case Iop_Shr64: { 1877 /* paranoid ... */ 1878 /*unsigned*/ ULong u64; 1879 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1880 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1881 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1882 if (shift >= 0 && shift <= 63) { 1883 u64 >>=/*unsigned*/ shift; 1884 e2 = IRExpr_Const(IRConst_U64(u64)); 1885 } 1886 break; 1887 } 1888 1889 /* -- CmpEQ -- */ 1890 case Iop_CmpEQ32: 1891 e2 = IRExpr_Const(IRConst_U1(toBool( 1892 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1893 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1894 break; 1895 case Iop_CmpEQ64: 1896 e2 = IRExpr_Const(IRConst_U1(toBool( 1897 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1898 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1899 break; 1900 1901 /* -- CmpNE -- */ 1902 case Iop_CmpNE8: 1903 case Iop_CasCmpNE8: 1904 case Iop_ExpCmpNE8: 1905 e2 = IRExpr_Const(IRConst_U1(toBool( 1906 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) 1907 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); 1908 break; 1909 case Iop_CmpNE32: 1910 case Iop_CasCmpNE32: 1911 case Iop_ExpCmpNE32: 1912 e2 = IRExpr_Const(IRConst_U1(toBool( 1913 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1914 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1915 break; 1916 case Iop_CmpNE64: 1917 case Iop_CasCmpNE64: 1918 case Iop_ExpCmpNE64: 1919 e2 = IRExpr_Const(IRConst_U1(toBool( 1920 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1921 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1922 break; 1923 1924 /* -- CmpLEU -- */ 1925 case Iop_CmpLE32U: 1926 e2 = IRExpr_Const(IRConst_U1(toBool( 1927 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1928 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1929 break; 1930 case Iop_CmpLE64U: 1931 e2 = IRExpr_Const(IRConst_U1(toBool( 1932 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1933 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1934 break; 1935 1936 /* -- CmpLES -- */ 1937 case Iop_CmpLE32S: 1938 e2 = IRExpr_Const(IRConst_U1(toBool( 1939 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1940 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1941 break; 1942 case Iop_CmpLE64S: 1943 e2 = IRExpr_Const(IRConst_U1(toBool( 1944 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1945 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1946 break; 1947 1948 /* -- CmpLTS -- */ 1949 case Iop_CmpLT32S: 1950 e2 = IRExpr_Const(IRConst_U1(toBool( 1951 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1952 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1953 break; 1954 case Iop_CmpLT64S: 1955 e2 = IRExpr_Const(IRConst_U1(toBool( 1956 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1957 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1958 break; 1959 1960 /* -- CmpLTU -- */ 1961 case Iop_CmpLT32U: 1962 e2 = IRExpr_Const(IRConst_U1(toBool( 1963 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1964 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1965 break; 1966 case Iop_CmpLT64U: 1967 e2 = IRExpr_Const(IRConst_U1(toBool( 1968 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1969 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1970 break; 1971 1972 /* -- CmpORD -- */ 1973 case Iop_CmpORD32S: { 1974 /* very paranoid */ 1975 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1976 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1977 Int s32a = (Int)u32a; 1978 Int s32b = (Int)u32b; 1979 Int r = 0x2; /* EQ */ 1980 if (s32a < s32b) { 1981 r = 0x8; /* LT */ 1982 } 1983 else if (s32a > s32b) { 1984 r = 0x4; /* GT */ 1985 } 1986 e2 = IRExpr_Const(IRConst_U32(r)); 1987 break; 1988 } 1989 1990 /* -- nHLto2n -- */ 1991 case Iop_32HLto64: 1992 e2 = IRExpr_Const(IRConst_U64( 1993 (((ULong)(e->Iex.Binop.arg1 1994 ->Iex.Const.con->Ico.U32)) << 32) 1995 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)) 1996 )); 1997 break; 1998 case Iop_64HLto128: 1999 /* We can't fold this, because there is no way to 2000 express he result in IR, but at least pretend to 2001 handle it, so as to stop getting blasted with 2002 no-rule-for-this-primop messages. */ 2003 break; 2004 /* For this vector one, can't fold all cases, but at 2005 least do the most obvious one. Could do better here 2006 using summarise/desummarise of vector constants, but 2007 too difficult to verify; hence just handle the zero 2008 cases. */ 2009 case Iop_64HLtoV128: { 2010 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64; 2011 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2012 if (0 == argHi && 0 == argLo) { 2013 e2 = IRExpr_Const(IRConst_V128(0)); 2014 } else { 2015 goto unhandled; 2016 } 2017 break; 2018 } 2019 /* Same reasoning for the 256-bit version. */ 2020 case Iop_V128HLtoV256: { 2021 IRExpr* argHi = e->Iex.Binop.arg1; 2022 IRExpr* argLo = e->Iex.Binop.arg2; 2023 if (isZeroV128(argHi) && isZeroV128(argLo)) { 2024 e2 = IRExpr_Const(IRConst_V256(0)); 2025 } else { 2026 goto unhandled; 2027 } 2028 break; 2029 } 2030 2031 /* -- V128 stuff -- */ 2032 case Iop_InterleaveLO8x16: { 2033 /* This turns up a lot in Memcheck instrumentation of 2034 Icc generated code. I don't know why. */ 2035 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128; 2036 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128; 2037 if (0 == arg1 && 0 == arg2) { 2038 e2 = IRExpr_Const(IRConst_V128(0)); 2039 } else { 2040 goto unhandled; 2041 } 2042 break; 2043 } 2044 2045 default: 2046 goto unhandled; 2047 } 2048 2049 } else { 2050 2051 /* other cases (identities, etc) */ 2052 switch (e->Iex.Binop.op) { 2053 2054 case Iop_Shl32: 2055 case Iop_Shl64: 2056 case Iop_Shr64: 2057 /* Shl32/Shl64/Shr64(x,0) ==> x */ 2058 if (isZeroU(e->Iex.Binop.arg2)) { 2059 e2 = e->Iex.Binop.arg1; 2060 break; 2061 } 2062 /* Shl32/Shl64/Shr64(0,x) ==> 0 */ 2063 if (isZeroU(e->Iex.Binop.arg1)) { 2064 e2 = e->Iex.Binop.arg1; 2065 break; 2066 } 2067 break; 2068 2069 case Iop_Shr32: 2070 /* Shr32(x,0) ==> x */ 2071 if (isZeroU(e->Iex.Binop.arg2)) { 2072 e2 = e->Iex.Binop.arg1; 2073 break; 2074 } 2075 break; 2076 2077 case Iop_Or8: 2078 case Iop_Or16: 2079 case Iop_Or32: 2080 case Iop_Or64: 2081 case Iop_Max32U: 2082 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */ 2083 if (isZeroU(e->Iex.Binop.arg2)) { 2084 e2 = e->Iex.Binop.arg1; 2085 break; 2086 } 2087 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */ 2088 if (isZeroU(e->Iex.Binop.arg1)) { 2089 e2 = e->Iex.Binop.arg2; 2090 break; 2091 } 2092 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */ 2093 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */ 2094 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) { 2095 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2096 break; 2097 } 2098 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */ 2099 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2100 e2 = e->Iex.Binop.arg1; 2101 break; 2102 } 2103 break; 2104 2105 case Iop_Add8: 2106 /* Add8(t,t) ==> t << 1. 2107 Memcheck doesn't understand that 2108 x+x produces a defined least significant bit, and it seems 2109 simplest just to get rid of the problem by rewriting it 2110 out, since the opportunity to do so exists. */ 2111 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2112 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1, 2113 IRExpr_Const(IRConst_U8(1))); 2114 break; 2115 } 2116 break; 2117 2118 /* NB no Add16(t,t) case yet as no known test case exists */ 2119 2120 case Iop_Add32: 2121 case Iop_Add64: 2122 /* Add32/Add64(x,0) ==> x */ 2123 if (isZeroU(e->Iex.Binop.arg2)) { 2124 e2 = e->Iex.Binop.arg1; 2125 break; 2126 } 2127 /* Add32/Add64(0,x) ==> x */ 2128 if (isZeroU(e->Iex.Binop.arg1)) { 2129 e2 = e->Iex.Binop.arg2; 2130 break; 2131 } 2132 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */ 2133 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2134 e2 = IRExpr_Binop( 2135 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64, 2136 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); 2137 break; 2138 } 2139 break; 2140 2141 case Iop_Sub32: 2142 case Iop_Sub64: 2143 /* Sub32/Sub64(x,0) ==> x */ 2144 if (isZeroU(e->Iex.Binop.arg2)) { 2145 e2 = e->Iex.Binop.arg1; 2146 break; 2147 } 2148 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */ 2149 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2150 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2151 break; 2152 } 2153 break; 2154 case Iop_Sub8x16: 2155 /* Sub8x16(x,0) ==> x */ 2156 if (isZeroV128(e->Iex.Binop.arg2)) { 2157 e2 = e->Iex.Binop.arg1; 2158 break; 2159 } 2160 break; 2161 2162 case Iop_And64: 2163 case Iop_And32: 2164 /* And32/And64(x,1---1b) ==> x */ 2165 if (isOnesU(e->Iex.Binop.arg2)) { 2166 e2 = e->Iex.Binop.arg1; 2167 break; 2168 } 2169 /* And32/And64(x,0) ==> 0 */ 2170 if (isZeroU(e->Iex.Binop.arg2)) { 2171 e2 = e->Iex.Binop.arg2; 2172 break; 2173 } 2174 /* And32/And64(0,x) ==> 0 */ 2175 if (isZeroU(e->Iex.Binop.arg1)) { 2176 e2 = e->Iex.Binop.arg1; 2177 break; 2178 } 2179 /* And32/And64(t,t) ==> t, for some IRTemp t */ 2180 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2181 e2 = e->Iex.Binop.arg1; 2182 break; 2183 } 2184 break; 2185 2186 case Iop_And8: 2187 case Iop_And16: 2188 case Iop_AndV128: 2189 case Iop_AndV256: 2190 /* And8/And16/AndV128/AndV256(t,t) 2191 ==> t, for some IRTemp t */ 2192 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2193 e2 = e->Iex.Binop.arg1; 2194 break; 2195 } 2196 /* Deal with either arg zero. Could handle other And 2197 cases here too. */ 2198 if (e->Iex.Binop.op == Iop_And64 2199 && (isZeroU64(e->Iex.Binop.arg1) 2200 || isZeroU64(e->Iex.Binop.arg2))) { 2201 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2202 break; 2203 } else if (e->Iex.Binop.op == Iop_AndV128 2204 && (isZeroV128(e->Iex.Binop.arg1) 2205 || isZeroV128(e->Iex.Binop.arg2))) { 2206 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2207 break; 2208 } 2209 break; 2210 2211 case Iop_OrV128: 2212 case Iop_OrV256: 2213 /* V128/V256(t,t) ==> t, for some IRTemp t */ 2214 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2215 e2 = e->Iex.Binop.arg1; 2216 break; 2217 } 2218 /* OrV128(t,0) ==> t */ 2219 if (e->Iex.Binop.op == Iop_OrV128) { 2220 if (isZeroV128(e->Iex.Binop.arg2)) { 2221 e2 = e->Iex.Binop.arg1; 2222 break; 2223 } 2224 if (isZeroV128(e->Iex.Binop.arg1)) { 2225 e2 = e->Iex.Binop.arg2; 2226 break; 2227 } 2228 } 2229 /* OrV256(t,0) ==> t */ 2230 if (e->Iex.Binop.op == Iop_OrV256) { 2231 if (isZeroV256(e->Iex.Binop.arg2)) { 2232 e2 = e->Iex.Binop.arg1; 2233 break; 2234 } 2235 //Disabled because there's no known test case right now. 2236 //if (isZeroV256(e->Iex.Binop.arg1)) { 2237 // e2 = e->Iex.Binop.arg2; 2238 // break; 2239 //} 2240 } 2241 break; 2242 2243 case Iop_Xor8: 2244 case Iop_Xor16: 2245 case Iop_Xor32: 2246 case Iop_Xor64: 2247 case Iop_XorV128: 2248 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ 2249 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2250 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2251 break; 2252 } 2253 break; 2254 2255 case Iop_CmpNE32: 2256 /* CmpNE32(t,t) ==> 0, for some IRTemp t */ 2257 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2258 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2259 break; 2260 } 2261 /* CmpNE32(1Uto32(b), 0) ==> b */ 2262 if (isZeroU32(e->Iex.Binop.arg2)) { 2263 IRExpr* a1 = chase(env, e->Iex.Binop.arg1); 2264 if (a1 && a1->tag == Iex_Unop 2265 && a1->Iex.Unop.op == Iop_1Uto32) { 2266 e2 = a1->Iex.Unop.arg; 2267 break; 2268 } 2269 } 2270 break; 2271 2272 case Iop_CmpEQ32: 2273 case Iop_CmpEQ64: 2274 case Iop_CmpEQ8x8: 2275 case Iop_CmpEQ8x16: 2276 case Iop_CmpEQ16x8: 2277 case Iop_CmpEQ32x4: 2278 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2279 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2280 break; 2281 } 2282 break; 2283 2284 default: 2285 break; 2286 } 2287 } 2288 break; 2289 2290 case Iex_ITE: 2291 /* ITE */ 2292 /* is the discriminant is a constant? */ 2293 if (e->Iex.ITE.cond->tag == Iex_Const) { 2294 /* assured us by the IR type rules */ 2295 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1); 2296 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1 2297 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse; 2298 } 2299 else 2300 /* are the arms identical? (pretty weedy test) */ 2301 if (sameIRExprs(env, e->Iex.ITE.iftrue, 2302 e->Iex.ITE.iffalse)) { 2303 e2 = e->Iex.ITE.iffalse; 2304 } 2305 break; 2306 2307 default: 2308 /* not considered */ 2309 break; 2310 } 2311 2312 /* Show cases where we've found but not folded 'op(t,t)'. Be 2313 careful not to call sameIRExprs with values of different types, 2314 though, else it will assert (and so it should!). We can't 2315 conveniently call typeOfIRExpr on the two args without a whole 2316 bunch of extra plumbing to pass in a type env, so just use a 2317 hacky test to check the arguments are not anything that might 2318 sameIRExprs to assert. This is only OK because this kludge is 2319 only used for debug printing, not for "real" operation. For 2320 "real" operation (ie, all other calls to sameIRExprs), it is 2321 essential that the to args have the same type. 2322 2323 The "right" solution is to plumb the containing block's 2324 IRTypeEnv through to here and use typeOfIRExpr to be sure. But 2325 that's a bunch of extra parameter passing which will just slow 2326 down the normal case, for no purpose. */ 2327 if (vex_control.iropt_verbosity > 0 2328 && e == e2 2329 && e->tag == Iex_Binop 2330 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1, 2331 e->Iex.Binop.arg2) 2332 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2333 vex_printf("vex iropt: fold_Expr: no ident rule for: "); 2334 ppIRExpr(e); 2335 vex_printf("\n"); 2336 } 2337 2338 /* Show the overall results of folding. */ 2339 if (DEBUG_IROPT && e2 != e) { 2340 vex_printf("FOLD: "); 2341 ppIRExpr(e); vex_printf(" -> "); 2342 ppIRExpr(e2); vex_printf("\n"); 2343 } 2344 2345 return e2; 2346 2347 unhandled: 2348 # if 0 2349 vex_printf("\n\n"); 2350 ppIRExpr(e); 2351 vpanic("fold_Expr: no rule for the above"); 2352 # else 2353 if (vex_control.iropt_verbosity > 0) { 2354 vex_printf("vex iropt: fold_Expr: no const rule for: "); 2355 ppIRExpr(e); 2356 vex_printf("\n"); 2357 } 2358 return e2; 2359 # endif 2360 } 2361 2362 2363 /* Apply the subst to a simple 1-level expression -- guaranteed to be 2364 1-level due to previous flattening pass. */ 2365 2366 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) 2367 { 2368 switch (ex->tag) { 2369 case Iex_RdTmp: 2370 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { 2371 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; 2372 if (rhs->tag == Iex_RdTmp) 2373 return rhs; 2374 if (rhs->tag == Iex_Const 2375 && rhs->Iex.Const.con->tag != Ico_F64i) 2376 return rhs; 2377 } 2378 /* not bound in env */ 2379 return ex; 2380 2381 case Iex_Const: 2382 case Iex_Get: 2383 return ex; 2384 2385 case Iex_GetI: 2386 vassert(isIRAtom(ex->Iex.GetI.ix)); 2387 return IRExpr_GetI( 2388 ex->Iex.GetI.descr, 2389 subst_Expr(env, ex->Iex.GetI.ix), 2390 ex->Iex.GetI.bias 2391 ); 2392 2393 case Iex_Qop: { 2394 IRQop* qop = ex->Iex.Qop.details; 2395 vassert(isIRAtom(qop->arg1)); 2396 vassert(isIRAtom(qop->arg2)); 2397 vassert(isIRAtom(qop->arg3)); 2398 vassert(isIRAtom(qop->arg4)); 2399 return IRExpr_Qop( 2400 qop->op, 2401 subst_Expr(env, qop->arg1), 2402 subst_Expr(env, qop->arg2), 2403 subst_Expr(env, qop->arg3), 2404 subst_Expr(env, qop->arg4) 2405 ); 2406 } 2407 2408 case Iex_Triop: { 2409 IRTriop* triop = ex->Iex.Triop.details; 2410 vassert(isIRAtom(triop->arg1)); 2411 vassert(isIRAtom(triop->arg2)); 2412 vassert(isIRAtom(triop->arg3)); 2413 return IRExpr_Triop( 2414 triop->op, 2415 subst_Expr(env, triop->arg1), 2416 subst_Expr(env, triop->arg2), 2417 subst_Expr(env, triop->arg3) 2418 ); 2419 } 2420 2421 case Iex_Binop: 2422 vassert(isIRAtom(ex->Iex.Binop.arg1)); 2423 vassert(isIRAtom(ex->Iex.Binop.arg2)); 2424 return IRExpr_Binop( 2425 ex->Iex.Binop.op, 2426 subst_Expr(env, ex->Iex.Binop.arg1), 2427 subst_Expr(env, ex->Iex.Binop.arg2) 2428 ); 2429 2430 case Iex_Unop: 2431 vassert(isIRAtom(ex->Iex.Unop.arg)); 2432 return IRExpr_Unop( 2433 ex->Iex.Unop.op, 2434 subst_Expr(env, ex->Iex.Unop.arg) 2435 ); 2436 2437 case Iex_Load: 2438 vassert(isIRAtom(ex->Iex.Load.addr)); 2439 return IRExpr_Load( 2440 ex->Iex.Load.end, 2441 ex->Iex.Load.ty, 2442 subst_Expr(env, ex->Iex.Load.addr) 2443 ); 2444 2445 case Iex_CCall: { 2446 Int i; 2447 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args); 2448 for (i = 0; args2[i]; i++) { 2449 vassert(isIRAtom(args2[i])); 2450 args2[i] = subst_Expr(env, args2[i]); 2451 } 2452 return IRExpr_CCall( 2453 ex->Iex.CCall.cee, 2454 ex->Iex.CCall.retty, 2455 args2 2456 ); 2457 } 2458 2459 case Iex_ITE: 2460 vassert(isIRAtom(ex->Iex.ITE.cond)); 2461 vassert(isIRAtom(ex->Iex.ITE.iftrue)); 2462 vassert(isIRAtom(ex->Iex.ITE.iffalse)); 2463 return IRExpr_ITE( 2464 subst_Expr(env, ex->Iex.ITE.cond), 2465 subst_Expr(env, ex->Iex.ITE.iftrue), 2466 subst_Expr(env, ex->Iex.ITE.iffalse) 2467 ); 2468 2469 default: 2470 vex_printf("\n\n"); ppIRExpr(ex); 2471 vpanic("subst_Expr"); 2472 2473 } 2474 } 2475 2476 2477 /* Apply the subst to stmt, then fold the result as much as possible. 2478 Much simplified due to stmt being previously flattened. As a 2479 result of this, the stmt may wind up being turned into a no-op. 2480 */ 2481 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) 2482 { 2483 # if 0 2484 vex_printf("\nsubst and fold stmt\n"); 2485 ppIRStmt(st); 2486 vex_printf("\n"); 2487 # endif 2488 2489 switch (st->tag) { 2490 case Ist_AbiHint: 2491 vassert(isIRAtom(st->Ist.AbiHint.base)); 2492 vassert(isIRAtom(st->Ist.AbiHint.nia)); 2493 return IRStmt_AbiHint( 2494 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)), 2495 st->Ist.AbiHint.len, 2496 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia)) 2497 ); 2498 case Ist_Put: 2499 vassert(isIRAtom(st->Ist.Put.data)); 2500 return IRStmt_Put( 2501 st->Ist.Put.offset, 2502 fold_Expr(env, subst_Expr(env, st->Ist.Put.data)) 2503 ); 2504 2505 case Ist_PutI: { 2506 IRPutI *puti, *puti2; 2507 puti = st->Ist.PutI.details; 2508 vassert(isIRAtom(puti->ix)); 2509 vassert(isIRAtom(puti->data)); 2510 puti2 = mkIRPutI(puti->descr, 2511 fold_Expr(env, subst_Expr(env, puti->ix)), 2512 puti->bias, 2513 fold_Expr(env, subst_Expr(env, puti->data))); 2514 return IRStmt_PutI(puti2); 2515 } 2516 2517 case Ist_WrTmp: 2518 /* This is the one place where an expr (st->Ist.WrTmp.data) is 2519 allowed to be more than just a constant or a tmp. */ 2520 return IRStmt_WrTmp( 2521 st->Ist.WrTmp.tmp, 2522 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data)) 2523 ); 2524 2525 case Ist_Store: 2526 vassert(isIRAtom(st->Ist.Store.addr)); 2527 vassert(isIRAtom(st->Ist.Store.data)); 2528 return IRStmt_Store( 2529 st->Ist.Store.end, 2530 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)), 2531 fold_Expr(env, subst_Expr(env, st->Ist.Store.data)) 2532 ); 2533 2534 case Ist_StoreG: { 2535 IRStoreG* sg = st->Ist.StoreG.details; 2536 vassert(isIRAtom(sg->addr)); 2537 vassert(isIRAtom(sg->data)); 2538 vassert(isIRAtom(sg->guard)); 2539 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr)); 2540 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data)); 2541 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard)); 2542 if (fguard->tag == Iex_Const) { 2543 /* The condition on this store has folded down to a constant. */ 2544 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2545 if (fguard->Iex.Const.con->Ico.U1 == False) { 2546 return IRStmt_NoOp(); 2547 } else { 2548 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2549 return IRStmt_Store(sg->end, faddr, fdata); 2550 } 2551 } 2552 return IRStmt_StoreG(sg->end, faddr, fdata, fguard); 2553 } 2554 2555 case Ist_LoadG: { 2556 /* This is complicated. If the guard folds down to 'false', 2557 we can replace it with an assignment 'dst := alt', but if 2558 the guard folds down to 'true', we can't conveniently 2559 replace it with an unconditional load, because doing so 2560 requires generating a new temporary, and that is not easy 2561 to do at this point. */ 2562 IRLoadG* lg = st->Ist.LoadG.details; 2563 vassert(isIRAtom(lg->addr)); 2564 vassert(isIRAtom(lg->alt)); 2565 vassert(isIRAtom(lg->guard)); 2566 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr)); 2567 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt)); 2568 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard)); 2569 if (fguard->tag == Iex_Const) { 2570 /* The condition on this load has folded down to a constant. */ 2571 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2572 if (fguard->Iex.Const.con->Ico.U1 == False) { 2573 /* The load is not going to happen -- instead 'alt' is 2574 assigned to 'dst'. */ 2575 return IRStmt_WrTmp(lg->dst, falt); 2576 } else { 2577 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2578 /* The load is always going to happen. We want to 2579 convert to an unconditional load and assign to 'dst' 2580 (IRStmt_WrTmp). Problem is we need an extra temp to 2581 hold the loaded value, but none is available. 2582 Instead, reconstitute the conditional load (with 2583 folded args, of course) and let the caller of this 2584 routine deal with the problem. */ 2585 } 2586 } 2587 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard); 2588 } 2589 2590 case Ist_CAS: { 2591 IRCAS *cas, *cas2; 2592 cas = st->Ist.CAS.details; 2593 vassert(isIRAtom(cas->addr)); 2594 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 2595 vassert(isIRAtom(cas->expdLo)); 2596 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 2597 vassert(isIRAtom(cas->dataLo)); 2598 cas2 = mkIRCAS( 2599 cas->oldHi, cas->oldLo, cas->end, 2600 fold_Expr(env, subst_Expr(env, cas->addr)), 2601 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) 2602 : NULL, 2603 fold_Expr(env, subst_Expr(env, cas->expdLo)), 2604 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) 2605 : NULL, 2606 fold_Expr(env, subst_Expr(env, cas->dataLo)) 2607 ); 2608 return IRStmt_CAS(cas2); 2609 } 2610 2611 case Ist_LLSC: 2612 vassert(isIRAtom(st->Ist.LLSC.addr)); 2613 if (st->Ist.LLSC.storedata) 2614 vassert(isIRAtom(st->Ist.LLSC.storedata)); 2615 return IRStmt_LLSC( 2616 st->Ist.LLSC.end, 2617 st->Ist.LLSC.result, 2618 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)), 2619 st->Ist.LLSC.storedata 2620 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata)) 2621 : NULL 2622 ); 2623 2624 case Ist_Dirty: { 2625 Int i; 2626 IRDirty *d, *d2; 2627 d = st->Ist.Dirty.details; 2628 d2 = emptyIRDirty(); 2629 *d2 = *d; 2630 d2->args = shallowCopyIRExprVec(d2->args); 2631 if (d2->mFx != Ifx_None) { 2632 vassert(isIRAtom(d2->mAddr)); 2633 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr)); 2634 } 2635 vassert(isIRAtom(d2->guard)); 2636 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard)); 2637 for (i = 0; d2->args[i]; i++) { 2638 IRExpr* arg = d2->args[i]; 2639 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { 2640 vassert(isIRAtom(arg)); 2641 d2->args[i] = fold_Expr(env, subst_Expr(env, arg)); 2642 } 2643 } 2644 return IRStmt_Dirty(d2); 2645 } 2646 2647 case Ist_IMark: 2648 return IRStmt_IMark(st->Ist.IMark.addr, 2649 st->Ist.IMark.len, 2650 st->Ist.IMark.delta); 2651 2652 case Ist_NoOp: 2653 return IRStmt_NoOp(); 2654 2655 case Ist_MBE: 2656 return IRStmt_MBE(st->Ist.MBE.event); 2657 2658 case Ist_Exit: { 2659 IRExpr* fcond; 2660 vassert(isIRAtom(st->Ist.Exit.guard)); 2661 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard)); 2662 if (fcond->tag == Iex_Const) { 2663 /* Interesting. The condition on this exit has folded down to 2664 a constant. */ 2665 vassert(fcond->Iex.Const.con->tag == Ico_U1); 2666 if (fcond->Iex.Const.con->Ico.U1 == False) { 2667 /* exit is never going to happen, so dump the statement. */ 2668 return IRStmt_NoOp(); 2669 } else { 2670 vassert(fcond->Iex.Const.con->Ico.U1 == True); 2671 /* Hmmm. The exit has become unconditional. Leave it 2672 as it is for now, since we'd have to truncate the BB 2673 at this point, which is tricky. Such truncation is 2674 done later by the dead-code elimination pass. */ 2675 /* fall out into the reconstruct-the-exit code. */ 2676 if (vex_control.iropt_verbosity > 0) 2677 /* really a misuse of vex_control.iropt_verbosity */ 2678 vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); 2679 } 2680 } 2681 return IRStmt_Exit(fcond, st->Ist.Exit.jk, 2682 st->Ist.Exit.dst, st->Ist.Exit.offsIP); 2683 } 2684 2685 default: 2686 vex_printf("\n"); ppIRStmt(st); 2687 vpanic("subst_and_fold_Stmt"); 2688 } 2689 } 2690 2691 2692 IRSB* cprop_BB ( IRSB* in ) 2693 { 2694 Int i; 2695 IRSB* out; 2696 IRStmt* st2; 2697 Int n_tmps = in->tyenv->types_used; 2698 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*)); 2699 /* Keep track of IRStmt_LoadGs that we need to revisit after 2700 processing all the other statements. */ 2701 const Int N_FIXUPS = 16; 2702 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */ 2703 Int n_fixups = 0; 2704 2705 out = emptyIRSB(); 2706 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 2707 2708 /* Set up the env with which travels forward. This holds a 2709 substitution, mapping IRTemps to IRExprs. The environment 2710 is to be applied as we move along. Keys are IRTemps. 2711 Values are IRExpr*s. 2712 */ 2713 for (i = 0; i < n_tmps; i++) 2714 env[i] = NULL; 2715 2716 /* For each original SSA-form stmt ... */ 2717 for (i = 0; i < in->stmts_used; i++) { 2718 2719 /* First apply the substitution to the current stmt. This 2720 propagates in any constants and tmp-tmp assignments 2721 accumulated prior to this point. As part of the subst_Stmt 2722 call, also then fold any constant expressions resulting. */ 2723 2724 st2 = in->stmts[i]; 2725 2726 /* perhaps st2 is already a no-op? */ 2727 if (st2->tag == Ist_NoOp) continue; 2728 2729 st2 = subst_and_fold_Stmt( env, st2 ); 2730 2731 /* Deal with some post-folding special cases. */ 2732 switch (st2->tag) { 2733 2734 /* If the statement has been folded into a no-op, forget 2735 it. */ 2736 case Ist_NoOp: 2737 continue; 2738 2739 /* If the statement assigns to an IRTemp add it to the 2740 running environment. This is for the benefit of copy 2741 propagation and to allow sameIRExpr look through 2742 IRTemps. */ 2743 case Ist_WrTmp: { 2744 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); 2745 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 2746 2747 /* 't1 = t2' -- don't add to BB; will be optimized out */ 2748 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) 2749 continue; 2750 2751 /* 't = const' && 'const != F64i' -- don't add to BB 2752 Note, we choose not to propagate const when const is an 2753 F64i, so that F64i literals can be CSE'd later. This 2754 helps x86 floating point code generation. */ 2755 if (st2->Ist.WrTmp.data->tag == Iex_Const 2756 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) { 2757 continue; 2758 } 2759 /* else add it to the output, as normal */ 2760 break; 2761 } 2762 2763 case Ist_LoadG: { 2764 IRLoadG* lg = st2->Ist.LoadG.details; 2765 IRExpr* guard = lg->guard; 2766 if (guard->tag == Iex_Const) { 2767 /* The guard has folded to a constant, and that 2768 constant must be 1:I1, since subst_and_fold_Stmt 2769 folds out the case 0:I1 by itself. */ 2770 vassert(guard->Iex.Const.con->tag == Ico_U1); 2771 vassert(guard->Iex.Const.con->Ico.U1 == True); 2772 /* Add a NoOp here as a placeholder, and make a note of 2773 where it is in the output block. Afterwards we'll 2774 come back here and transform the NoOp and the LoadG 2775 into a load-convert pair. The fixups[] entry 2776 refers to the inserted NoOp, and we expect to find 2777 the relevant LoadG immediately after it. */ 2778 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2779 if (n_fixups < N_FIXUPS) { 2780 fixups[n_fixups++] = out->stmts_used; 2781 addStmtToIRSB( out, IRStmt_NoOp() ); 2782 } 2783 } 2784 /* And always add the LoadG to the output, regardless. */ 2785 break; 2786 } 2787 2788 default: 2789 break; 2790 } 2791 2792 /* Not interesting, copy st2 into the output block. */ 2793 addStmtToIRSB( out, st2 ); 2794 } 2795 2796 # if STATS_IROPT 2797 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", 2798 invocation_count, recursion_count, success_count, 2799 recursion_success_count, max_nodes_visited); 2800 # endif 2801 2802 out->next = subst_Expr( env, in->next ); 2803 out->jumpkind = in->jumpkind; 2804 out->offsIP = in->offsIP; 2805 2806 /* Process any leftover unconditional LoadGs that we noticed 2807 in the main pass. */ 2808 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2809 for (i = 0; i < n_fixups; i++) { 2810 Int ix = fixups[i]; 2811 /* Carefully verify that the LoadG has the expected form. */ 2812 vassert(ix >= 0 && ix+1 < out->stmts_used); 2813 IRStmt* nop = out->stmts[ix]; 2814 IRStmt* lgu = out->stmts[ix+1]; 2815 vassert(nop->tag == Ist_NoOp); 2816 vassert(lgu->tag == Ist_LoadG); 2817 IRLoadG* lg = lgu->Ist.LoadG.details; 2818 IRExpr* guard = lg->guard; 2819 vassert(guard->Iex.Const.con->tag == Ico_U1); 2820 vassert(guard->Iex.Const.con->Ico.U1 == True); 2821 /* Figure out the load and result types, and the implied 2822 conversion operation. */ 2823 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID; 2824 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg); 2825 IROp cvtOp = Iop_INVALID; 2826 switch (lg->cvt) { 2827 case ILGop_Ident32: break; 2828 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break; 2829 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break; 2830 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break; 2831 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break; 2832 default: vpanic("cprop_BB: unhandled ILGOp"); 2833 } 2834 /* Replace the placeholder NoOp by the required unconditional 2835 load. */ 2836 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg); 2837 out->stmts[ix] 2838 = IRStmt_WrTmp(tLoaded, 2839 IRExpr_Load(lg->end, cvtArg, lg->addr)); 2840 /* Replace the LoadG by a conversion from the loaded value's 2841 type to the required result type. */ 2842 out->stmts[ix+1] 2843 = IRStmt_WrTmp( 2844 lg->dst, cvtOp == Iop_INVALID 2845 ? IRExpr_RdTmp(tLoaded) 2846 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded))); 2847 } 2848 2849 return out; 2850 } 2851 2852 2853 /*---------------------------------------------------------------*/ 2854 /*--- Dead code (t = E) removal ---*/ 2855 /*---------------------------------------------------------------*/ 2856 2857 /* As a side effect, also removes all code following an unconditional 2858 side exit. */ 2859 2860 /* The type of the HashHW map is: a map from IRTemp to nothing 2861 -- really just operating a set or IRTemps. 2862 */ 2863 2864 inline 2865 static void addUses_Temp ( Bool* set, IRTemp tmp ) 2866 { 2867 set[(Int)tmp] = True; 2868 } 2869 2870 static void addUses_Expr ( Bool* set, IRExpr* e ) 2871 { 2872 Int i; 2873 switch (e->tag) { 2874 case Iex_GetI: 2875 addUses_Expr(set, e->Iex.GetI.ix); 2876 return; 2877 case Iex_ITE: 2878 addUses_Expr(set, e->Iex.ITE.cond); 2879 addUses_Expr(set, e->Iex.ITE.iftrue); 2880 addUses_Expr(set, e->Iex.ITE.iffalse); 2881 return; 2882 case Iex_CCall: 2883 for (i = 0; e->Iex.CCall.args[i]; i++) 2884 addUses_Expr(set, e->Iex.CCall.args[i]); 2885 return; 2886 case Iex_Load: 2887 addUses_Expr(set, e->Iex.Load.addr); 2888 return; 2889 case Iex_Qop: 2890 addUses_Expr(set, e->Iex.Qop.details->arg1); 2891 addUses_Expr(set, e->Iex.Qop.details->arg2); 2892 addUses_Expr(set, e->Iex.Qop.details->arg3); 2893 addUses_Expr(set, e->Iex.Qop.details->arg4); 2894 return; 2895 case Iex_Triop: 2896 addUses_Expr(set, e->Iex.Triop.details->arg1); 2897 addUses_Expr(set, e->Iex.Triop.details->arg2); 2898 addUses_Expr(set, e->Iex.Triop.details->arg3); 2899 return; 2900 case Iex_Binop: 2901 addUses_Expr(set, e->Iex.Binop.arg1); 2902 addUses_Expr(set, e->Iex.Binop.arg2); 2903 return; 2904 case Iex_Unop: 2905 addUses_Expr(set, e->Iex.Unop.arg); 2906 return; 2907 case Iex_RdTmp: 2908 addUses_Temp(set, e->Iex.RdTmp.tmp); 2909 return; 2910 case Iex_Const: 2911 case Iex_Get: 2912 return; 2913 default: 2914 vex_printf("\n"); 2915 ppIRExpr(e); 2916 vpanic("addUses_Expr"); 2917 } 2918 } 2919 2920 static void addUses_Stmt ( Bool* set, IRStmt* st ) 2921 { 2922 Int i; 2923 IRDirty* d; 2924 IRCAS* cas; 2925 switch (st->tag) { 2926 case Ist_AbiHint: 2927 addUses_Expr(set, st->Ist.AbiHint.base); 2928 addUses_Expr(set, st->Ist.AbiHint.nia); 2929 return; 2930 case Ist_PutI: 2931 addUses_Expr(set, st->Ist.PutI.details->ix); 2932 addUses_Expr(set, st->Ist.PutI.details->data); 2933 return; 2934 case Ist_WrTmp: 2935 addUses_Expr(set, st->Ist.WrTmp.data); 2936 return; 2937 case Ist_Put: 2938 addUses_Expr(set, st->Ist.Put.data); 2939 return; 2940 case Ist_Store: 2941 addUses_Expr(set, st->Ist.Store.addr); 2942 addUses_Expr(set, st->Ist.Store.data); 2943 return; 2944 case Ist_StoreG: { 2945 IRStoreG* sg = st->Ist.StoreG.details; 2946 addUses_Expr(set, sg->addr); 2947 addUses_Expr(set, sg->data); 2948 addUses_Expr(set, sg->guard); 2949 return; 2950 } 2951 case Ist_LoadG: { 2952 IRLoadG* lg = st->Ist.LoadG.details; 2953 addUses_Expr(set, lg->addr); 2954 addUses_Expr(set, lg->alt); 2955 addUses_Expr(set, lg->guard); 2956 return; 2957 } 2958 case Ist_CAS: 2959 cas = st->Ist.CAS.details; 2960 addUses_Expr(set, cas->addr); 2961 if (cas->expdHi) 2962 addUses_Expr(set, cas->expdHi); 2963 addUses_Expr(set, cas->expdLo); 2964 if (cas->dataHi) 2965 addUses_Expr(set, cas->dataHi); 2966 addUses_Expr(set, cas->dataLo); 2967 return; 2968 case Ist_LLSC: 2969 addUses_Expr(set, st->Ist.LLSC.addr); 2970 if (st->Ist.LLSC.storedata) 2971 addUses_Expr(set, st->Ist.LLSC.storedata); 2972 return; 2973 case Ist_Dirty: 2974 d = st->Ist.Dirty.details; 2975 if (d->mFx != Ifx_None) 2976 addUses_Expr(set, d->mAddr); 2977 addUses_Expr(set, d->guard); 2978 for (i = 0; d->args[i] != NULL; i++) { 2979 IRExpr* arg = d->args[i]; 2980 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 2981 addUses_Expr(set, arg); 2982 } 2983 return; 2984 case Ist_NoOp: 2985 case Ist_IMark: 2986 case Ist_MBE: 2987 return; 2988 case Ist_Exit: 2989 addUses_Expr(set, st->Ist.Exit.guard); 2990 return; 2991 default: 2992 vex_printf("\n"); 2993 ppIRStmt(st); 2994 vpanic("addUses_Stmt"); 2995 } 2996 } 2997 2998 2999 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */ 3000 static Bool isZeroU1 ( IRExpr* e ) 3001 { 3002 return toBool( e->tag == Iex_Const 3003 && e->Iex.Const.con->tag == Ico_U1 3004 && e->Iex.Const.con->Ico.U1 == False ); 3005 } 3006 3007 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */ 3008 static Bool isOneU1 ( IRExpr* e ) 3009 { 3010 return toBool( e->tag == Iex_Const 3011 && e->Iex.Const.con->tag == Ico_U1 3012 && e->Iex.Const.con->Ico.U1 == True ); 3013 } 3014 3015 3016 /* Note, this destructively modifies the given IRSB. */ 3017 3018 /* Scan backwards through statements, carrying a set of IRTemps which 3019 are known to be used after the current point. On encountering 't = 3020 E', delete the binding if it is not used. Otherwise, add any temp 3021 uses to the set and keep on moving backwards. 3022 3023 As an enhancement, the first (backwards) pass searches for IR exits 3024 with always-taken conditions and notes the location of the earliest 3025 one in the block. If any such are found, a second pass copies the 3026 exit destination and jump kind to the bb-end. Then, the exit and 3027 all statements following it are turned into no-ops. 3028 */ 3029 3030 /* notstatic */ void do_deadcode_BB ( IRSB* bb ) 3031 { 3032 Int i, i_unconditional_exit; 3033 Int n_tmps = bb->tyenv->types_used; 3034 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool)); 3035 IRStmt* st; 3036 3037 for (i = 0; i < n_tmps; i++) 3038 set[i] = False; 3039 3040 /* start off by recording IRTemp uses in the next field. */ 3041 addUses_Expr(set, bb->next); 3042 3043 /* First pass */ 3044 3045 /* Work backwards through the stmts */ 3046 i_unconditional_exit = -1; 3047 for (i = bb->stmts_used-1; i >= 0; i--) { 3048 st = bb->stmts[i]; 3049 if (st->tag == Ist_NoOp) 3050 continue; 3051 /* take note of any unconditional exits */ 3052 if (st->tag == Ist_Exit 3053 && isOneU1(st->Ist.Exit.guard)) 3054 i_unconditional_exit = i; 3055 if (st->tag == Ist_WrTmp 3056 && set[(Int)(st->Ist.WrTmp.tmp)] == False) { 3057 /* it's an IRTemp which never got used. Delete it. */ 3058 if (DEBUG_IROPT) { 3059 vex_printf("DEAD: "); 3060 ppIRStmt(st); 3061 vex_printf("\n"); 3062 } 3063 bb->stmts[i] = IRStmt_NoOp(); 3064 } 3065 else 3066 if (st->tag == Ist_Dirty 3067 && st->Ist.Dirty.details->guard 3068 && isZeroU1(st->Ist.Dirty.details->guard)) { 3069 /* This is a dirty helper which will never get called. 3070 Delete it. */ 3071 bb->stmts[i] = IRStmt_NoOp(); 3072 } 3073 else { 3074 /* Note any IRTemp uses made by the current statement. */ 3075 addUses_Stmt(set, st); 3076 } 3077 } 3078 3079 /* Optional second pass: if any unconditional exits were found, 3080 delete them and all following statements. */ 3081 3082 if (i_unconditional_exit != -1) { 3083 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", 3084 i_unconditional_exit); 3085 vassert(i_unconditional_exit >= 0 3086 && i_unconditional_exit < bb->stmts_used); 3087 bb->next 3088 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); 3089 bb->jumpkind 3090 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; 3091 bb->offsIP 3092 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; 3093 for (i = i_unconditional_exit; i < bb->stmts_used; i++) 3094 bb->stmts[i] = IRStmt_NoOp(); 3095 } 3096 } 3097 3098 3099 /*---------------------------------------------------------------*/ 3100 /*--- Specialisation of helper function calls, in ---*/ 3101 /*--- collaboration with the front end ---*/ 3102 /*---------------------------------------------------------------*/ 3103 3104 static 3105 IRSB* spec_helpers_BB( 3106 IRSB* bb, 3107 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int) 3108 ) 3109 { 3110 Int i; 3111 IRStmt* st; 3112 IRExpr* ex; 3113 Bool any = False; 3114 3115 for (i = bb->stmts_used-1; i >= 0; i--) { 3116 st = bb->stmts[i]; 3117 3118 if (st->tag != Ist_WrTmp 3119 || st->Ist.WrTmp.data->tag != Iex_CCall) 3120 continue; 3121 3122 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, 3123 st->Ist.WrTmp.data->Iex.CCall.args, 3124 &bb->stmts[0], i ); 3125 if (!ex) 3126 /* the front end can't think of a suitable replacement */ 3127 continue; 3128 3129 /* We got something better. Install it in the bb. */ 3130 any = True; 3131 bb->stmts[i] 3132 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); 3133 3134 if (0) { 3135 vex_printf("SPEC: "); 3136 ppIRExpr(st->Ist.WrTmp.data); 3137 vex_printf(" --> "); 3138 ppIRExpr(ex); 3139 vex_printf("\n"); 3140 } 3141 } 3142 3143 if (any) 3144 bb = flatten_BB(bb); 3145 return bb; 3146 } 3147 3148 3149 /*---------------------------------------------------------------*/ 3150 /*--- Determination of guest state aliasing relationships ---*/ 3151 /*---------------------------------------------------------------*/ 3152 3153 /* These are helper functions for CSE and GetI/PutI transformations. 3154 3155 Determine, to the extent possible, the relationship between two 3156 guest state accesses. The possible outcomes are: 3157 3158 * Exact alias. These two accesses denote precisely the same 3159 piece of the guest state. 3160 3161 * Definitely no alias. These two accesses are guaranteed not to 3162 overlap any part of the guest state. 3163 3164 * Unknown -- if neither of the above can be established. 3165 3166 If in doubt, return Unknown. */ 3167 3168 typedef 3169 enum { ExactAlias, NoAlias, UnknownAlias } 3170 GSAliasing; 3171 3172 3173 /* Produces the alias relation between an indexed guest 3174 state access and a non-indexed access. */ 3175 3176 static 3177 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1, 3178 Int offset2, IRType ty2 ) 3179 { 3180 UInt minoff1, maxoff1, minoff2, maxoff2; 3181 3182 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3183 minoff2 = offset2; 3184 maxoff2 = minoff2 + sizeofIRType(ty2) - 1; 3185 3186 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3187 return NoAlias; 3188 3189 /* Could probably do better here if required. For the moment 3190 however just claim not to know anything more. */ 3191 return UnknownAlias; 3192 } 3193 3194 3195 /* Produces the alias relation between two indexed guest state 3196 accesses. */ 3197 3198 static 3199 GSAliasing getAliasingRelation_II ( 3200 IRRegArray* descr1, IRExpr* ix1, Int bias1, 3201 IRRegArray* descr2, IRExpr* ix2, Int bias2 3202 ) 3203 { 3204 UInt minoff1, maxoff1, minoff2, maxoff2; 3205 Int iters; 3206 3207 /* First try hard to show they don't alias. */ 3208 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3209 getArrayBounds( descr2, &minoff2, &maxoff2 ); 3210 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3211 return NoAlias; 3212 3213 /* So the two arrays at least partially overlap. To get any 3214 further we'll have to be sure that the descriptors are 3215 identical. */ 3216 if (!eqIRRegArray(descr1, descr2)) 3217 return UnknownAlias; 3218 3219 /* The descriptors are identical. Now the only difference can be 3220 in the index expressions. If they cannot be shown to be 3221 identical, we have to say we don't know what the aliasing 3222 relation will be. Now, since the IR is flattened, the index 3223 expressions should be atoms -- either consts or tmps. So that 3224 makes the comparison simple. */ 3225 vassert(isIRAtom(ix1)); 3226 vassert(isIRAtom(ix2)); 3227 if (!eqIRAtom(ix1,ix2)) 3228 return UnknownAlias; 3229 3230 /* Ok, the index expressions are identical. So now the only way 3231 they can be different is in the bias. Normalise this 3232 paranoidly, to reliably establish equality/non-equality. */ 3233 3234 /* So now we know that the GetI and PutI index the same array 3235 with the same base. Are the offsets the same, modulo the 3236 array size? Do this paranoidly. */ 3237 vassert(descr1->nElems == descr2->nElems); 3238 vassert(descr1->elemTy == descr2->elemTy); 3239 vassert(descr1->base == descr2->base); 3240 iters = 0; 3241 while (bias1 < 0 || bias2 < 0) { 3242 bias1 += descr1->nElems; 3243 bias2 += descr1->nElems; 3244 iters++; 3245 if (iters > 10) 3246 vpanic("getAliasingRelation: iters"); 3247 } 3248 vassert(bias1 >= 0 && bias2 >= 0); 3249 bias1 %= descr1->nElems; 3250 bias2 %= descr1->nElems; 3251 vassert(bias1 >= 0 && bias1 < descr1->nElems); 3252 vassert(bias2 >= 0 && bias2 < descr1->nElems); 3253 3254 /* Finally, biasP and biasG are normalised into the range 3255 0 .. descrP/G->nElems - 1. And so we can establish 3256 equality/non-equality. */ 3257 3258 return bias1==bias2 ? ExactAlias : NoAlias; 3259 } 3260 3261 3262 /*---------------------------------------------------------------*/ 3263 /*--- Common Subexpression Elimination ---*/ 3264 /*---------------------------------------------------------------*/ 3265 3266 /* Expensive in time and space. */ 3267 3268 /* Uses two environments: 3269 a IRTemp -> IRTemp mapping 3270 a mapping from AvailExpr* to IRTemp 3271 */ 3272 3273 typedef 3274 struct { 3275 enum { TCc, TCt } tag; 3276 union { IRTemp tmp; IRConst* con; } u; 3277 } 3278 TmpOrConst; 3279 3280 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 ) 3281 { 3282 if (tc1->tag != tc2->tag) 3283 return False; 3284 switch (tc1->tag) { 3285 case TCc: 3286 return eqIRConst(tc1->u.con, tc2->u.con); 3287 case TCt: 3288 return tc1->u.tmp == tc2->u.tmp; 3289 default: 3290 vpanic("eqTmpOrConst"); 3291 } 3292 } 3293 3294 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 ) 3295 { 3296 Bool eq = cee1->addr == cee2->addr; 3297 if (eq) { 3298 vassert(cee1->regparms == cee2->regparms); 3299 vassert(cee1->mcx_mask == cee2->mcx_mask); 3300 /* Names should be the same too, but we don't bother to 3301 check. */ 3302 } 3303 return eq; 3304 } 3305 3306 /* Convert a NULL terminated IRExpr* vector to an array of 3307 TmpOrConsts, and a length. */ 3308 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs, 3309 /*OUT*/Int* nOuts, 3310 IRExpr** ins ) 3311 { 3312 Int i, n; 3313 /* We have to make two passes, one to count, one to copy. */ 3314 for (n = 0; ins[n]; n++) 3315 ; 3316 *outs = LibVEX_Alloc(n * sizeof(TmpOrConst)); 3317 *nOuts = n; 3318 /* and now copy .. */ 3319 for (i = 0; i < n; i++) { 3320 IRExpr* arg = ins[i]; 3321 TmpOrConst* dst = &(*outs)[i]; 3322 if (arg->tag == Iex_RdTmp) { 3323 dst->tag = TCt; 3324 dst->u.tmp = arg->Iex.RdTmp.tmp; 3325 } 3326 else if (arg->tag == Iex_Const) { 3327 dst->tag = TCc; 3328 dst->u.con = arg->Iex.Const.con; 3329 } 3330 else { 3331 /* Failure of this is serious; it means that the presented arg 3332 isn't an IR atom, as it should be. */ 3333 vpanic("irExprVec_to_TmpOrConsts"); 3334 } 3335 } 3336 } 3337 3338 typedef 3339 struct { 3340 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt, 3341 CCall 3342 } tag; 3343 union { 3344 /* unop(tmp) */ 3345 struct { 3346 IROp op; 3347 IRTemp arg; 3348 } Ut; 3349 /* binop(tmp,tmp) */ 3350 struct { 3351 IROp op; 3352 IRTemp arg1; 3353 IRTemp arg2; 3354 } Btt; 3355 /* binop(tmp,const) */ 3356 struct { 3357 IROp op; 3358 IRTemp arg1; 3359 IRConst con2; 3360 } Btc; 3361 /* binop(const,tmp) */ 3362 struct { 3363 IROp op; 3364 IRConst con1; 3365 IRTemp arg2; 3366 } Bct; 3367 /* F64i-style const */ 3368 struct { 3369 ULong f64i; 3370 } Cf64i; 3371 /* ITE(tmp,tmp,tmp) */ 3372 struct { 3373 IRTemp co; 3374 IRTemp e1; 3375 IRTemp e0; 3376 } Ittt; 3377 /* ITE(tmp,tmp,const) */ 3378 struct { 3379 IRTemp co; 3380 IRTemp e1; 3381 IRConst con0; 3382 } Ittc; 3383 /* ITE(tmp,const,tmp) */ 3384 struct { 3385 IRTemp co; 3386 IRConst con1; 3387 IRTemp e0; 3388 } Itct; 3389 /* ITE(tmp,const,const) */ 3390 struct { 3391 IRTemp co; 3392 IRConst con1; 3393 IRConst con0; 3394 } Itcc; 3395 /* GetI(descr,tmp,bias)*/ 3396 struct { 3397 IRRegArray* descr; 3398 IRTemp ix; 3399 Int bias; 3400 } GetIt; 3401 /* Clean helper call */ 3402 struct { 3403 IRCallee* cee; 3404 TmpOrConst* args; 3405 Int nArgs; 3406 IRType retty; 3407 } CCall; 3408 } u; 3409 } 3410 AvailExpr; 3411 3412 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 ) 3413 { 3414 if (LIKELY(a1->tag != a2->tag)) 3415 return False; 3416 switch (a1->tag) { 3417 case Ut: 3418 return toBool( 3419 a1->u.Ut.op == a2->u.Ut.op 3420 && a1->u.Ut.arg == a2->u.Ut.arg); 3421 case Btt: 3422 return toBool( 3423 a1->u.Btt.op == a2->u.Btt.op 3424 && a1->u.Btt.arg1 == a2->u.Btt.arg1 3425 && a1->u.Btt.arg2 == a2->u.Btt.arg2); 3426 case Btc: 3427 return toBool( 3428 a1->u.Btc.op == a2->u.Btc.op 3429 && a1->u.Btc.arg1 == a2->u.Btc.arg1 3430 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2)); 3431 case Bct: 3432 return toBool( 3433 a1->u.Bct.op == a2->u.Bct.op 3434 && a1->u.Bct.arg2 == a2->u.Bct.arg2 3435 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1)); 3436 case Cf64i: 3437 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i); 3438 case Ittt: 3439 return toBool(a1->u.Ittt.co == a2->u.Ittt.co 3440 && a1->u.Ittt.e1 == a2->u.Ittt.e1 3441 && a1->u.Ittt.e0 == a2->u.Ittt.e0); 3442 case Ittc: 3443 return toBool(a1->u.Ittc.co == a2->u.Ittc.co 3444 && a1->u.Ittc.e1 == a2->u.Ittc.e1 3445 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0)); 3446 case Itct: 3447 return toBool(a1->u.Itct.co == a2->u.Itct.co 3448 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1) 3449 && a1->u.Itct.e0 == a2->u.Itct.e0); 3450 case Itcc: 3451 return toBool(a1->u.Itcc.co == a2->u.Itcc.co 3452 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1) 3453 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0)); 3454 case GetIt: 3455 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr) 3456 && a1->u.GetIt.ix == a2->u.GetIt.ix 3457 && a1->u.GetIt.bias == a2->u.GetIt.bias); 3458 case CCall: { 3459 Int i, n; 3460 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs 3461 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee); 3462 if (eq) { 3463 n = a1->u.CCall.nArgs; 3464 for (i = 0; i < n; i++) { 3465 if (!eqTmpOrConst( &a1->u.CCall.args[i], 3466 &a2->u.CCall.args[i] )) { 3467 eq = False; 3468 break; 3469 } 3470 } 3471 } 3472 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty); 3473 return eq; 3474 } 3475 default: vpanic("eq_AvailExpr"); 3476 } 3477 } 3478 3479 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae ) 3480 { 3481 IRConst *con, *con0, *con1; 3482 switch (ae->tag) { 3483 case Ut: 3484 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) ); 3485 case Btt: 3486 return IRExpr_Binop( ae->u.Btt.op, 3487 IRExpr_RdTmp(ae->u.Btt.arg1), 3488 IRExpr_RdTmp(ae->u.Btt.arg2) ); 3489 case Btc: 3490 con = LibVEX_Alloc(sizeof(IRConst)); 3491 *con = ae->u.Btc.con2; 3492 return IRExpr_Binop( ae->u.Btc.op, 3493 IRExpr_RdTmp(ae->u.Btc.arg1), 3494 IRExpr_Const(con) ); 3495 case Bct: 3496 con = LibVEX_Alloc(sizeof(IRConst)); 3497 *con = ae->u.Bct.con1; 3498 return IRExpr_Binop( ae->u.Bct.op, 3499 IRExpr_Const(con), 3500 IRExpr_RdTmp(ae->u.Bct.arg2) ); 3501 case Cf64i: 3502 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i)); 3503 case Ittt: 3504 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co), 3505 IRExpr_RdTmp(ae->u.Ittt.e1), 3506 IRExpr_RdTmp(ae->u.Ittt.e0)); 3507 case Ittc: 3508 con0 = LibVEX_Alloc(sizeof(IRConst)); 3509 *con0 = ae->u.Ittc.con0; 3510 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co), 3511 IRExpr_RdTmp(ae->u.Ittc.e1), 3512 IRExpr_Const(con0)); 3513 case Itct: 3514 con1 = LibVEX_Alloc(sizeof(IRConst)); 3515 *con1 = ae->u.Itct.con1; 3516 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co), 3517 IRExpr_Const(con1), 3518 IRExpr_RdTmp(ae->u.Itct.e0)); 3519 3520 case Itcc: 3521 con0 = LibVEX_Alloc(sizeof(IRConst)); 3522 con1 = LibVEX_Alloc(sizeof(IRConst)); 3523 *con0 = ae->u.Itcc.con0; 3524 *con1 = ae->u.Itcc.con1; 3525 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co), 3526 IRExpr_Const(con1), 3527 IRExpr_Const(con0)); 3528 case GetIt: 3529 return IRExpr_GetI(ae->u.GetIt.descr, 3530 IRExpr_RdTmp(ae->u.GetIt.ix), 3531 ae->u.GetIt.bias); 3532 case CCall: { 3533 Int i, n = ae->u.CCall.nArgs; 3534 vassert(n >= 0); 3535 IRExpr** vec = LibVEX_Alloc((n+1) * sizeof(IRExpr*)); 3536 vec[n] = NULL; 3537 for (i = 0; i < n; i++) { 3538 TmpOrConst* tc = &ae->u.CCall.args[i]; 3539 if (tc->tag == TCc) { 3540 vec[i] = IRExpr_Const(tc->u.con); 3541 } 3542 else if (tc->tag == TCt) { 3543 vec[i] = IRExpr_RdTmp(tc->u.tmp); 3544 } 3545 else vpanic("availExpr_to_IRExpr:CCall-arg"); 3546 } 3547 return IRExpr_CCall(ae->u.CCall.cee, 3548 ae->u.CCall.retty, 3549 vec); 3550 } 3551 default: 3552 vpanic("availExpr_to_IRExpr"); 3553 } 3554 } 3555 3556 inline 3557 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp ) 3558 { 3559 HWord res; 3560 /* env :: IRTemp -> IRTemp */ 3561 if (lookupHHW( env, &res, (HWord)tmp )) 3562 return (IRTemp)res; 3563 else 3564 return tmp; 3565 } 3566 3567 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae ) 3568 { 3569 /* env :: IRTemp -> IRTemp */ 3570 switch (ae->tag) { 3571 case Ut: 3572 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg ); 3573 break; 3574 case Btt: 3575 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 ); 3576 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 ); 3577 break; 3578 case Btc: 3579 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 ); 3580 break; 3581 case Bct: 3582 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 ); 3583 break; 3584 case Cf64i: 3585 break; 3586 case Ittt: 3587 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co ); 3588 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 ); 3589 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 ); 3590 break; 3591 case Ittc: 3592 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co ); 3593 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 ); 3594 break; 3595 case Itct: 3596 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co ); 3597 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 ); 3598 break; 3599 case Itcc: 3600 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co ); 3601 break; 3602 case GetIt: 3603 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix ); 3604 break; 3605 case CCall: { 3606 Int i, n = ae->u.CCall.nArgs;; 3607 for (i = 0; i < n; i++) { 3608 TmpOrConst* tc = &ae->u.CCall.args[i]; 3609 if (tc->tag == TCt) { 3610 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp ); 3611 } 3612 } 3613 break; 3614 } 3615 default: 3616 vpanic("subst_AvailExpr"); 3617 } 3618 } 3619 3620 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e ) 3621 { 3622 AvailExpr* ae; 3623 3624 switch (e->tag) { 3625 case Iex_Unop: 3626 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 3627 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3628 ae->tag = Ut; 3629 ae->u.Ut.op = e->Iex.Unop.op; 3630 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp; 3631 return ae; 3632 } 3633 break; 3634 3635 case Iex_Binop: 3636 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) { 3637 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3638 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3639 ae->tag = Btt; 3640 ae->u.Btt.op = e->Iex.Binop.op; 3641 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3642 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3643 return ae; 3644 } 3645 if (e->Iex.Binop.arg2->tag == Iex_Const) { 3646 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3647 ae->tag = Btc; 3648 ae->u.Btc.op = e->Iex.Binop.op; 3649 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3650 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con); 3651 return ae; 3652 } 3653 } else if (e->Iex.Binop.arg1->tag == Iex_Const 3654 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3655 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3656 ae->tag = Bct; 3657 ae->u.Bct.op = e->Iex.Binop.op; 3658 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3659 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con); 3660 return ae; 3661 } 3662 break; 3663 3664 case Iex_Const: 3665 if (e->Iex.Const.con->tag == Ico_F64i) { 3666 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3667 ae->tag = Cf64i; 3668 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i; 3669 return ae; 3670 } 3671 break; 3672 3673 case Iex_ITE: 3674 if (e->Iex.ITE.cond->tag == Iex_RdTmp) { 3675 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) { 3676 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3677 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3678 ae->tag = Ittt; 3679 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3680 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3681 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3682 return ae; 3683 } 3684 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3685 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3686 ae->tag = Itct; 3687 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3688 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3689 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3690 return ae; 3691 } 3692 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) { 3693 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3694 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3695 ae->tag = Ittc; 3696 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3697 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3698 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3699 return ae; 3700 } 3701 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3702 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3703 ae->tag = Itcc; 3704 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3705 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3706 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3707 return ae; 3708 } 3709 } 3710 } 3711 break; 3712 3713 case Iex_GetI: 3714 if (e->Iex.GetI.ix->tag == Iex_RdTmp) { 3715 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3716 ae->tag = GetIt; 3717 ae->u.GetIt.descr = e->Iex.GetI.descr; 3718 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp; 3719 ae->u.GetIt.bias = e->Iex.GetI.bias; 3720 return ae; 3721 } 3722 break; 3723 3724 case Iex_CCall: 3725 ae = LibVEX_Alloc(sizeof(AvailExpr)); 3726 ae->tag = CCall; 3727 /* Ok to share only the cee, since it is immutable. */ 3728 ae->u.CCall.cee = e->Iex.CCall.cee; 3729 ae->u.CCall.retty = e->Iex.CCall.retty; 3730 /* irExprVec_to_TmpOrConsts will assert if the args are 3731 neither tmps nor constants, but that's ok .. that's all they 3732 should be. */ 3733 irExprVec_to_TmpOrConsts( 3734 &ae->u.CCall.args, &ae->u.CCall.nArgs, 3735 e->Iex.CCall.args 3736 ); 3737 return ae; 3738 3739 default: 3740 break; 3741 } 3742 3743 return NULL; 3744 } 3745 3746 3747 /* The BB is modified in-place. Returns True if any changes were 3748 made. */ 3749 3750 static Bool do_cse_BB ( IRSB* bb ) 3751 { 3752 Int i, j, paranoia; 3753 IRTemp t, q; 3754 IRStmt* st; 3755 AvailExpr* eprime; 3756 AvailExpr* ae; 3757 Bool invalidate; 3758 Bool anyDone = False; 3759 3760 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ 3761 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ 3762 3763 vassert(sizeof(IRTemp) <= sizeof(HWord)); 3764 3765 if (0) { ppIRSB(bb); vex_printf("\n\n"); } 3766 3767 /* Iterate forwards over the stmts. 3768 On seeing "t = E", where E is one of the AvailExpr forms: 3769 let E' = apply tenv substitution to E 3770 search aenv for E' 3771 if a mapping E' -> q is found, 3772 replace this stmt by "t = q" 3773 and add binding t -> q to tenv 3774 else 3775 add binding E' -> t to aenv 3776 replace this stmt by "t = E'" 3777 3778 Other statements are only interesting to the extent that they 3779 might invalidate some of the expressions in aenv. So there is 3780 an invalidate-bindings check for each statement seen. 3781 */ 3782 for (i = 0; i < bb->stmts_used; i++) { 3783 st = bb->stmts[i]; 3784 3785 /* ------ BEGIN invalidate aenv bindings ------ */ 3786 /* This is critical: remove from aenv any E' -> .. bindings 3787 which might be invalidated by this statement. The only 3788 vulnerable kind of bindings are the GetI kind. 3789 Dirty call - dump (paranoia level -> 2) 3790 Store - dump (ditto) 3791 Put, PutI - dump unless no-overlap is proven (.. -> 1) 3792 Uses getAliasingRelation_IC and getAliasingRelation_II 3793 to do the no-overlap assessments needed for Put/PutI. 3794 */ 3795 switch (st->tag) { 3796 case Ist_Dirty: case Ist_Store: case Ist_MBE: 3797 case Ist_CAS: case Ist_LLSC: 3798 case Ist_StoreG: 3799 paranoia = 2; break; 3800 case Ist_Put: case Ist_PutI: 3801 paranoia = 1; break; 3802 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: 3803 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG: 3804 paranoia = 0; break; 3805 default: 3806 vpanic("do_cse_BB(1)"); 3807 } 3808 3809 if (paranoia > 0) { 3810 for (j = 0; j < aenv->used; j++) { 3811 if (!aenv->inuse[j]) 3812 continue; 3813 ae = (AvailExpr*)aenv->key[j]; 3814 if (ae->tag != GetIt) 3815 continue; 3816 invalidate = False; 3817 if (paranoia >= 2) { 3818 invalidate = True; 3819 } else { 3820 vassert(paranoia == 1); 3821 if (st->tag == Ist_Put) { 3822 if (getAliasingRelation_IC( 3823 ae->u.GetIt.descr, 3824 IRExpr_RdTmp(ae->u.GetIt.ix), 3825 st->Ist.Put.offset, 3826 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) 3827 ) != NoAlias) 3828 invalidate = True; 3829 } 3830 else 3831 if (st->tag == Ist_PutI) { 3832 IRPutI *puti = st->Ist.PutI.details; 3833 if (getAliasingRelation_II( 3834 ae->u.GetIt.descr, 3835 IRExpr_RdTmp(ae->u.GetIt.ix), 3836 ae->u.GetIt.bias, 3837 puti->descr, 3838 puti->ix, 3839 puti->bias 3840 ) != NoAlias) 3841 invalidate = True; 3842 } 3843 else 3844 vpanic("do_cse_BB(2)"); 3845 } 3846 3847 if (invalidate) { 3848 aenv->inuse[j] = False; 3849 aenv->key[j] = (HWord)NULL; /* be sure */ 3850 } 3851 } /* for j */ 3852 } /* paranoia > 0 */ 3853 3854 /* ------ ENV invalidate aenv bindings ------ */ 3855 3856 /* ignore not-interestings */ 3857 if (st->tag != Ist_WrTmp) 3858 continue; 3859 3860 t = st->Ist.WrTmp.tmp; 3861 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data); 3862 /* ignore if not of AvailExpr form */ 3863 if (!eprime) 3864 continue; 3865 3866 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */ 3867 3868 /* apply tenv */ 3869 subst_AvailExpr( tenv, eprime ); 3870 3871 /* search aenv for eprime, unfortunately the hard way */ 3872 for (j = 0; j < aenv->used; j++) 3873 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j])) 3874 break; 3875 3876 if (j < aenv->used) { 3877 /* A binding E' -> q was found. Replace stmt by "t = q" and 3878 note the t->q binding in tenv. */ 3879 /* (this is the core of the CSE action) */ 3880 q = (IRTemp)aenv->val[j]; 3881 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); 3882 addToHHW( tenv, (HWord)t, (HWord)q ); 3883 anyDone = True; 3884 } else { 3885 /* No binding was found, so instead we add E' -> t to our 3886 collection of available expressions, replace this stmt 3887 with "t = E'", and move on. */ 3888 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); 3889 addToHHW( aenv, (HWord)eprime, (HWord)t ); 3890 } 3891 } 3892 3893 /* 3894 ppIRSB(bb); 3895 sanityCheckIRSB(bb, Ity_I32); 3896 vex_printf("\n\n"); 3897 */ 3898 return anyDone; 3899 } 3900 3901 3902 /*---------------------------------------------------------------*/ 3903 /*--- Add32/Sub32 chain collapsing ---*/ 3904 /*---------------------------------------------------------------*/ 3905 3906 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */ 3907 3908 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If 3909 yes, set *tmp and *i32 appropriately. *i32 is set as if the 3910 root node is Add32, not Sub32. */ 3911 3912 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) 3913 { 3914 if (e->tag != Iex_Binop) 3915 return False; 3916 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32) 3917 return False; 3918 if (e->Iex.Binop.arg1->tag != Iex_RdTmp) 3919 return False; 3920 if (e->Iex.Binop.arg2->tag != Iex_Const) 3921 return False; 3922 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3923 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32); 3924 if (e->Iex.Binop.op == Iop_Sub32) 3925 *i32 = -*i32; 3926 return True; 3927 } 3928 3929 3930 /* Figure out if tmp can be expressed as tmp2 +32 const, for some 3931 other tmp2. Scan backwards from the specified start point -- an 3932 optimisation. */ 3933 3934 static Bool collapseChain ( IRSB* bb, Int startHere, 3935 IRTemp tmp, 3936 IRTemp* tmp2, Int* i32 ) 3937 { 3938 Int j, ii; 3939 IRTemp vv; 3940 IRStmt* st; 3941 IRExpr* e; 3942 3943 /* the (var, con) pair contain the current 'representation' for 3944 'tmp'. We start with 'tmp + 0'. */ 3945 IRTemp var = tmp; 3946 Int con = 0; 3947 3948 /* Scan backwards to see if tmp can be replaced by some other tmp 3949 +/- a constant. */ 3950 for (j = startHere; j >= 0; j--) { 3951 st = bb->stmts[j]; 3952 if (st->tag != Ist_WrTmp) 3953 continue; 3954 if (st->Ist.WrTmp.tmp != var) 3955 continue; 3956 e = st->Ist.WrTmp.data; 3957 if (!isAdd32OrSub32(e, &vv, &ii)) 3958 break; 3959 var = vv; 3960 con += ii; 3961 } 3962 if (j == -1) 3963 /* no earlier binding for var .. ill-formed IR */ 3964 vpanic("collapseChain"); 3965 3966 /* so, did we find anything interesting? */ 3967 if (var == tmp) 3968 return False; /* no .. */ 3969 3970 *tmp2 = var; 3971 *i32 = con; 3972 return True; 3973 } 3974 3975 3976 /* ------- Main function for Add32/Sub32 chain collapsing ------ */ 3977 3978 static void collapse_AddSub_chains_BB ( IRSB* bb ) 3979 { 3980 IRStmt *st; 3981 IRTemp var, var2; 3982 Int i, con, con2; 3983 3984 for (i = bb->stmts_used-1; i >= 0; i--) { 3985 st = bb->stmts[i]; 3986 if (st->tag == Ist_NoOp) 3987 continue; 3988 3989 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */ 3990 3991 if (st->tag == Ist_WrTmp 3992 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) { 3993 3994 /* So e1 is of the form Add32(var,con) or Sub32(var,-con). 3995 Find out if var can be expressed as var2 + con2. */ 3996 if (collapseChain(bb, i-1, var, &var2, &con2)) { 3997 if (DEBUG_IROPT) { 3998 vex_printf("replacing1 "); 3999 ppIRStmt(st); 4000 vex_printf(" with "); 4001 } 4002 con2 += con; 4003 bb->stmts[i] 4004 = IRStmt_WrTmp( 4005 st->Ist.WrTmp.tmp, 4006 (con2 >= 0) 4007 ? IRExpr_Binop(Iop_Add32, 4008 IRExpr_RdTmp(var2), 4009 IRExpr_Const(IRConst_U32(con2))) 4010 : IRExpr_Binop(Iop_Sub32, 4011 IRExpr_RdTmp(var2), 4012 IRExpr_Const(IRConst_U32(-con2))) 4013 ); 4014 if (DEBUG_IROPT) { 4015 ppIRStmt(bb->stmts[i]); 4016 vex_printf("\n"); 4017 } 4018 } 4019 4020 continue; 4021 } 4022 4023 /* Try to collapse 't1 = GetI[t2, con]'. */ 4024 4025 if (st->tag == Ist_WrTmp 4026 && st->Ist.WrTmp.data->tag == Iex_GetI 4027 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp 4028 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix 4029 ->Iex.RdTmp.tmp, &var2, &con2)) { 4030 if (DEBUG_IROPT) { 4031 vex_printf("replacing3 "); 4032 ppIRStmt(st); 4033 vex_printf(" with "); 4034 } 4035 con2 += st->Ist.WrTmp.data->Iex.GetI.bias; 4036 bb->stmts[i] 4037 = IRStmt_WrTmp( 4038 st->Ist.WrTmp.tmp, 4039 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, 4040 IRExpr_RdTmp(var2), 4041 con2)); 4042 if (DEBUG_IROPT) { 4043 ppIRStmt(bb->stmts[i]); 4044 vex_printf("\n"); 4045 } 4046 continue; 4047 } 4048 4049 /* Perhaps st is PutI[t, con] ? */ 4050 IRPutI *puti = st->Ist.PutI.details; 4051 if (st->tag == Ist_PutI 4052 && puti->ix->tag == Iex_RdTmp 4053 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp, 4054 &var2, &con2)) { 4055 if (DEBUG_IROPT) { 4056 vex_printf("replacing2 "); 4057 ppIRStmt(st); 4058 vex_printf(" with "); 4059 } 4060 con2 += puti->bias; 4061 bb->stmts[i] 4062 = IRStmt_PutI(mkIRPutI(puti->descr, 4063 IRExpr_RdTmp(var2), 4064 con2, 4065 puti->data)); 4066 if (DEBUG_IROPT) { 4067 ppIRStmt(bb->stmts[i]); 4068 vex_printf("\n"); 4069 } 4070 continue; 4071 } 4072 4073 } /* for */ 4074 } 4075 4076 4077 /*---------------------------------------------------------------*/ 4078 /*--- PutI/GetI transformations ---*/ 4079 /*---------------------------------------------------------------*/ 4080 4081 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from 4082 the given starting point to find, if any, a PutI which writes 4083 exactly the same piece of guest state, and so return the expression 4084 that the PutI writes. This is the core of PutI-GetI forwarding. */ 4085 4086 static 4087 IRExpr* findPutI ( IRSB* bb, Int startHere, 4088 IRRegArray* descrG, IRExpr* ixG, Int biasG ) 4089 { 4090 Int j; 4091 IRStmt* st; 4092 GSAliasing relation; 4093 4094 if (0) { 4095 vex_printf("\nfindPutI "); 4096 ppIRRegArray(descrG); 4097 vex_printf(" "); 4098 ppIRExpr(ixG); 4099 vex_printf(" %d\n", biasG); 4100 } 4101 4102 /* Scan backwards in bb from startHere to find a suitable PutI 4103 binding for (descrG, ixG, biasG), if any. */ 4104 4105 for (j = startHere; j >= 0; j--) { 4106 st = bb->stmts[j]; 4107 if (st->tag == Ist_NoOp) 4108 continue; 4109 4110 if (st->tag == Ist_Put) { 4111 /* Non-indexed Put. This can't give a binding, but we do 4112 need to check it doesn't invalidate the search by 4113 overlapping any part of the indexed guest state. */ 4114 4115 relation 4116 = getAliasingRelation_IC( 4117 descrG, ixG, 4118 st->Ist.Put.offset, 4119 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 4120 4121 if (relation == NoAlias) { 4122 /* we're OK; keep going */ 4123 continue; 4124 } else { 4125 /* relation == UnknownAlias || relation == ExactAlias */ 4126 /* If this assertion fails, we've found a Put which writes 4127 an area of guest state which is read by a GetI. Which 4128 is unlikely (although not per se wrong). */ 4129 vassert(relation != ExactAlias); 4130 /* This Put potentially writes guest state that the GetI 4131 reads; we must fail. */ 4132 return NULL; 4133 } 4134 } 4135 4136 if (st->tag == Ist_PutI) { 4137 IRPutI *puti = st->Ist.PutI.details; 4138 4139 relation = getAliasingRelation_II( 4140 descrG, ixG, biasG, 4141 puti->descr, 4142 puti->ix, 4143 puti->bias 4144 ); 4145 4146 if (relation == NoAlias) { 4147 /* This PutI definitely doesn't overlap. Ignore it and 4148 keep going. */ 4149 continue; /* the for j loop */ 4150 } 4151 4152 if (relation == UnknownAlias) { 4153 /* We don't know if this PutI writes to the same guest 4154 state that the GetI, or not. So we have to give up. */ 4155 return NULL; 4156 } 4157 4158 /* Otherwise, we've found what we're looking for. */ 4159 vassert(relation == ExactAlias); 4160 return puti->data; 4161 4162 } /* if (st->tag == Ist_PutI) */ 4163 4164 if (st->tag == Ist_Dirty) { 4165 /* Be conservative. If the dirty call has any guest effects at 4166 all, give up. We could do better -- only give up if there 4167 are any guest writes/modifies. */ 4168 if (st->Ist.Dirty.details->nFxState > 0) 4169 return NULL; 4170 } 4171 4172 } /* for */ 4173 4174 /* No valid replacement was found. */ 4175 return NULL; 4176 } 4177 4178 4179 4180 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense 4181 that it writes exactly the same piece of guest state) ? Safe 4182 answer: False. */ 4183 4184 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 ) 4185 { 4186 vassert(pi->tag == Ist_PutI); 4187 if (s2->tag != Ist_PutI) 4188 return False; 4189 4190 IRPutI *p1 = pi->Ist.PutI.details; 4191 IRPutI *p2 = s2->Ist.PutI.details; 4192 4193 return toBool( 4194 getAliasingRelation_II( 4195 p1->descr, p1->ix, p1->bias, 4196 p2->descr, p2->ix, p2->bias 4197 ) 4198 == ExactAlias 4199 ); 4200 } 4201 4202 4203 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might 4204 overlap it? Safe answer: True. Note, we could do a lot better 4205 than this if needed. */ 4206 4207 static 4208 Bool guestAccessWhichMightOverlapPutI ( 4209 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2 4210 ) 4211 { 4212 GSAliasing relation; 4213 UInt minoffP, maxoffP; 4214 4215 vassert(pi->tag == Ist_PutI); 4216 4217 IRPutI *p1 = pi->Ist.PutI.details; 4218 4219 getArrayBounds(p1->descr, &minoffP, &maxoffP); 4220 switch (s2->tag) { 4221 4222 case Ist_NoOp: 4223 case Ist_IMark: 4224 return False; 4225 4226 case Ist_MBE: 4227 case Ist_AbiHint: 4228 /* just be paranoid ... these should be rare. */ 4229 return True; 4230 4231 case Ist_CAS: 4232 /* This is unbelievably lame, but it's probably not 4233 significant from a performance point of view. Really, a 4234 CAS is a load-store op, so it should be safe to say False. 4235 However .. */ 4236 return True; 4237 4238 case Ist_Dirty: 4239 /* If the dirty call has any guest effects at all, give up. 4240 Probably could do better. */ 4241 if (s2->Ist.Dirty.details->nFxState > 0) 4242 return True; 4243 return False; 4244 4245 case Ist_Put: 4246 vassert(isIRAtom(s2->Ist.Put.data)); 4247 relation 4248 = getAliasingRelation_IC( 4249 p1->descr, p1->ix, 4250 s2->Ist.Put.offset, 4251 typeOfIRExpr(tyenv,s2->Ist.Put.data) 4252 ); 4253 goto have_relation; 4254 4255 case Ist_PutI: { 4256 IRPutI *p2 = s2->Ist.PutI.details; 4257 4258 vassert(isIRAtom(p2->ix)); 4259 vassert(isIRAtom(p2->data)); 4260 relation 4261 = getAliasingRelation_II( 4262 p1->descr, p1->ix, p1->bias, 4263 p2->descr, p2->ix, p2->bias 4264 ); 4265 goto have_relation; 4266 } 4267 4268 case Ist_WrTmp: 4269 if (s2->Ist.WrTmp.data->tag == Iex_GetI) { 4270 relation 4271 = getAliasingRelation_II( 4272 p1->descr, p1->ix, p1->bias, 4273 s2->Ist.WrTmp.data->Iex.GetI.descr, 4274 s2->Ist.WrTmp.data->Iex.GetI.ix, 4275 s2->Ist.WrTmp.data->Iex.GetI.bias 4276 ); 4277 goto have_relation; 4278 } 4279 if (s2->Ist.WrTmp.data->tag == Iex_Get) { 4280 relation 4281 = getAliasingRelation_IC( 4282 p1->descr, p1->ix, 4283 s2->Ist.WrTmp.data->Iex.Get.offset, 4284 s2->Ist.WrTmp.data->Iex.Get.ty 4285 ); 4286 goto have_relation; 4287 } 4288 return False; 4289 4290 case Ist_Store: 4291 vassert(isIRAtom(s2->Ist.Store.addr)); 4292 vassert(isIRAtom(s2->Ist.Store.data)); 4293 return False; 4294 4295 default: 4296 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n"); 4297 vpanic("guestAccessWhichMightOverlapPutI"); 4298 } 4299 4300 have_relation: 4301 if (relation == NoAlias) 4302 return False; 4303 else 4304 return True; /* ExactAlias or UnknownAlias */ 4305 } 4306 4307 4308 4309 /* ---------- PutI/GetI transformations main functions --------- */ 4310 4311 /* Remove redundant GetIs, to the extent that they can be detected. 4312 bb is modified in-place. */ 4313 4314 static 4315 void do_redundant_GetI_elimination ( IRSB* bb ) 4316 { 4317 Int i; 4318 IRStmt* st; 4319 4320 for (i = bb->stmts_used-1; i >= 0; i--) { 4321 st = bb->stmts[i]; 4322 if (st->tag == Ist_NoOp) 4323 continue; 4324 4325 if (st->tag == Ist_WrTmp 4326 && st->Ist.WrTmp.data->tag == Iex_GetI 4327 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) { 4328 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr; 4329 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix; 4330 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias; 4331 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias); 4332 if (replacement 4333 && isIRAtom(replacement) 4334 /* Make sure we're doing a type-safe transformation! */ 4335 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) { 4336 if (DEBUG_IROPT) { 4337 vex_printf("rGI: "); 4338 ppIRExpr(st->Ist.WrTmp.data); 4339 vex_printf(" -> "); 4340 ppIRExpr(replacement); 4341 vex_printf("\n"); 4342 } 4343 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement); 4344 } 4345 } 4346 } 4347 4348 } 4349 4350 4351 /* Remove redundant PutIs, to the extent which they can be detected. 4352 bb is modified in-place. */ 4353 4354 static 4355 void do_redundant_PutI_elimination ( IRSB* bb ) 4356 { 4357 Int i, j; 4358 Bool delete; 4359 IRStmt *st, *stj; 4360 4361 vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn); 4362 4363 for (i = 0; i < bb->stmts_used; i++) { 4364 st = bb->stmts[i]; 4365 if (st->tag != Ist_PutI) 4366 continue; 4367 /* Ok, search forwards from here to see if we can find another 4368 PutI which makes this one redundant, and dodging various 4369 hazards. Search forwards: 4370 * If conditional exit, give up (because anything after that 4371 does not postdominate this put). 4372 * If a Get which might overlap, give up (because this PutI 4373 not necessarily dead). 4374 * If a Put which is identical, stop with success. 4375 * If a Put which might overlap, but is not identical, give up. 4376 * If a dirty helper call which might write guest state, give up. 4377 * If a Put which definitely doesn't overlap, or any other 4378 kind of stmt, continue. 4379 */ 4380 delete = False; 4381 for (j = i+1; j < bb->stmts_used; j++) { 4382 stj = bb->stmts[j]; 4383 if (stj->tag == Ist_NoOp) 4384 continue; 4385 if (identicalPutIs(st, stj)) { 4386 /* success! */ 4387 delete = True; 4388 break; 4389 } 4390 if (stj->tag == Ist_Exit) 4391 /* give up */ 4392 break; 4393 if (st->tag == Ist_Dirty) 4394 /* give up; could do better here */ 4395 break; 4396 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj)) 4397 /* give up */ 4398 break; 4399 } 4400 4401 if (delete) { 4402 if (DEBUG_IROPT) { 4403 vex_printf("rPI: "); 4404 ppIRStmt(st); 4405 vex_printf("\n"); 4406 } 4407 bb->stmts[i] = IRStmt_NoOp(); 4408 } 4409 4410 } 4411 } 4412 4413 4414 /*---------------------------------------------------------------*/ 4415 /*--- Loop unrolling ---*/ 4416 /*---------------------------------------------------------------*/ 4417 4418 /* Adjust all tmp values (names) in e by delta. e is destructively 4419 modified. */ 4420 4421 static void deltaIRExpr ( IRExpr* e, Int delta ) 4422 { 4423 Int i; 4424 switch (e->tag) { 4425 case Iex_RdTmp: 4426 e->Iex.RdTmp.tmp += delta; 4427 break; 4428 case Iex_Get: 4429 case Iex_Const: 4430 break; 4431 case Iex_GetI: 4432 deltaIRExpr(e->Iex.GetI.ix, delta); 4433 break; 4434 case Iex_Qop: 4435 deltaIRExpr(e->Iex.Qop.details->arg1, delta); 4436 deltaIRExpr(e->Iex.Qop.details->arg2, delta); 4437 deltaIRExpr(e->Iex.Qop.details->arg3, delta); 4438 deltaIRExpr(e->Iex.Qop.details->arg4, delta); 4439 break; 4440 case Iex_Triop: 4441 deltaIRExpr(e->Iex.Triop.details->arg1, delta); 4442 deltaIRExpr(e->Iex.Triop.details->arg2, delta); 4443 deltaIRExpr(e->Iex.Triop.details->arg3, delta); 4444 break; 4445 case Iex_Binop: 4446 deltaIRExpr(e->Iex.Binop.arg1, delta); 4447 deltaIRExpr(e->Iex.Binop.arg2, delta); 4448 break; 4449 case Iex_Unop: 4450 deltaIRExpr(e->Iex.Unop.arg, delta); 4451 break; 4452 case Iex_Load: 4453 deltaIRExpr(e->Iex.Load.addr, delta); 4454 break; 4455 case Iex_CCall: 4456 for (i = 0; e->Iex.CCall.args[i]; i++) 4457 deltaIRExpr(e->Iex.CCall.args[i], delta); 4458 break; 4459 case Iex_ITE: 4460 deltaIRExpr(e->Iex.ITE.cond, delta); 4461 deltaIRExpr(e->Iex.ITE.iftrue, delta); 4462 deltaIRExpr(e->Iex.ITE.iffalse, delta); 4463 break; 4464 default: 4465 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4466 vpanic("deltaIRExpr"); 4467 } 4468 } 4469 4470 /* Adjust all tmp values (names) in st by delta. st is destructively 4471 modified. */ 4472 4473 static void deltaIRStmt ( IRStmt* st, Int delta ) 4474 { 4475 Int i; 4476 IRDirty* d; 4477 switch (st->tag) { 4478 case Ist_NoOp: 4479 case Ist_IMark: 4480 case Ist_MBE: 4481 break; 4482 case Ist_AbiHint: 4483 deltaIRExpr(st->Ist.AbiHint.base, delta); 4484 deltaIRExpr(st->Ist.AbiHint.nia, delta); 4485 break; 4486 case Ist_Put: 4487 deltaIRExpr(st->Ist.Put.data, delta); 4488 break; 4489 case Ist_PutI: 4490 deltaIRExpr(st->Ist.PutI.details->ix, delta); 4491 deltaIRExpr(st->Ist.PutI.details->data, delta); 4492 break; 4493 case Ist_WrTmp: 4494 st->Ist.WrTmp.tmp += delta; 4495 deltaIRExpr(st->Ist.WrTmp.data, delta); 4496 break; 4497 case Ist_Exit: 4498 deltaIRExpr(st->Ist.Exit.guard, delta); 4499 break; 4500 case Ist_Store: 4501 deltaIRExpr(st->Ist.Store.addr, delta); 4502 deltaIRExpr(st->Ist.Store.data, delta); 4503 break; 4504 case Ist_StoreG: { 4505 IRStoreG* sg = st->Ist.StoreG.details; 4506 deltaIRExpr(sg->addr, delta); 4507 deltaIRExpr(sg->data, delta); 4508 deltaIRExpr(sg->guard, delta); 4509 break; 4510 } 4511 case Ist_LoadG: { 4512 IRLoadG* lg = st->Ist.LoadG.details; 4513 lg->dst += delta; 4514 deltaIRExpr(lg->addr, delta); 4515 deltaIRExpr(lg->alt, delta); 4516 deltaIRExpr(lg->guard, delta); 4517 break; 4518 } 4519 case Ist_CAS: 4520 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID) 4521 st->Ist.CAS.details->oldHi += delta; 4522 st->Ist.CAS.details->oldLo += delta; 4523 deltaIRExpr(st->Ist.CAS.details->addr, delta); 4524 if (st->Ist.CAS.details->expdHi) 4525 deltaIRExpr(st->Ist.CAS.details->expdHi, delta); 4526 deltaIRExpr(st->Ist.CAS.details->expdLo, delta); 4527 if (st->Ist.CAS.details->dataHi) 4528 deltaIRExpr(st->Ist.CAS.details->dataHi, delta); 4529 deltaIRExpr(st->Ist.CAS.details->dataLo, delta); 4530 break; 4531 case Ist_LLSC: 4532 st->Ist.LLSC.result += delta; 4533 deltaIRExpr(st->Ist.LLSC.addr, delta); 4534 if (st->Ist.LLSC.storedata) 4535 deltaIRExpr(st->Ist.LLSC.storedata, delta); 4536 break; 4537 case Ist_Dirty: 4538 d = st->Ist.Dirty.details; 4539 deltaIRExpr(d->guard, delta); 4540 for (i = 0; d->args[i]; i++) { 4541 IRExpr* arg = d->args[i]; 4542 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 4543 deltaIRExpr(arg, delta); 4544 } 4545 if (d->tmp != IRTemp_INVALID) 4546 d->tmp += delta; 4547 if (d->mAddr) 4548 deltaIRExpr(d->mAddr, delta); 4549 break; 4550 default: 4551 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4552 vpanic("deltaIRStmt"); 4553 } 4554 } 4555 4556 4557 /* If possible, return a loop-unrolled version of bb0. The original 4558 is changed. If not possible, return NULL. */ 4559 4560 /* The two schemas considered are: 4561 4562 X: BODY; goto X 4563 4564 which unrolls to (eg) X: BODY;BODY; goto X 4565 4566 and 4567 4568 X: BODY; if (c) goto X; goto Y 4569 which trivially transforms to 4570 X: BODY; if (!c) goto Y; goto X; 4571 so it falls in the scope of the first case. 4572 4573 X and Y must be literal (guest) addresses. 4574 */ 4575 4576 static Int calc_unroll_factor( IRSB* bb ) 4577 { 4578 Int n_stmts, i; 4579 4580 n_stmts = 0; 4581 for (i = 0; i < bb->stmts_used; i++) { 4582 if (bb->stmts[i]->tag != Ist_NoOp) 4583 n_stmts++; 4584 } 4585 4586 if (n_stmts <= vex_control.iropt_unroll_thresh/8) { 4587 if (vex_control.iropt_verbosity > 0) 4588 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n", 4589 n_stmts, 8* n_stmts); 4590 return 8; 4591 } 4592 if (n_stmts <= vex_control.iropt_unroll_thresh/4) { 4593 if (vex_control.iropt_verbosity > 0) 4594 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n", 4595 n_stmts, 4* n_stmts); 4596 return 4; 4597 } 4598 4599 if (n_stmts <= vex_control.iropt_unroll_thresh/2) { 4600 if (vex_control.iropt_verbosity > 0) 4601 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n", 4602 n_stmts, 2* n_stmts); 4603 return 2; 4604 } 4605 4606 if (vex_control.iropt_verbosity > 0) 4607 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts); 4608 4609 return 1; 4610 } 4611 4612 4613 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr ) 4614 { 4615 Int i, j, jmax, n_vars; 4616 Bool xxx_known; 4617 Addr64 xxx_value, yyy_value; 4618 IRExpr* udst; 4619 IRStmt* st; 4620 IRConst* con; 4621 IRSB *bb1, *bb2; 4622 Int unroll_factor; 4623 4624 if (vex_control.iropt_unroll_thresh <= 0) 4625 return NULL; 4626 4627 /* First off, figure out if we can unroll this loop. Do this 4628 without modifying bb0. */ 4629 4630 if (bb0->jumpkind != Ijk_Boring) 4631 return NULL; 4632 4633 xxx_known = False; 4634 xxx_value = 0; 4635 4636 /* Extract the next-guest address. If it isn't a literal, we 4637 have to give up. */ 4638 4639 udst = bb0->next; 4640 if (udst->tag == Iex_Const 4641 && (udst->Iex.Const.con->tag == Ico_U32 4642 || udst->Iex.Const.con->tag == Ico_U64)) { 4643 /* The BB ends in a jump to a literal location. */ 4644 xxx_known = True; 4645 xxx_value = udst->Iex.Const.con->tag == Ico_U64 4646 ? udst->Iex.Const.con->Ico.U64 4647 : (Addr64)(udst->Iex.Const.con->Ico.U32); 4648 } 4649 4650 if (!xxx_known) 4651 return NULL; 4652 4653 /* Now we know the BB ends to a jump to a literal location. If 4654 it's a jump to itself (viz, idiom #1), move directly to the 4655 unrolling stage, first cloning the bb so the original isn't 4656 modified. */ 4657 if (xxx_value == my_addr) { 4658 unroll_factor = calc_unroll_factor( bb0 ); 4659 if (unroll_factor < 2) 4660 return NULL; 4661 bb1 = deepCopyIRSB( bb0 ); 4662 bb0 = NULL; 4663 udst = NULL; /* is now invalid */ 4664 goto do_unroll; 4665 } 4666 4667 /* Search for the second idiomatic form: 4668 X: BODY; if (c) goto X; goto Y 4669 We know Y, but need to establish that the last stmt 4670 is 'if (c) goto X'. 4671 */ 4672 yyy_value = xxx_value; 4673 for (i = bb0->stmts_used-1; i >= 0; i--) 4674 if (bb0->stmts[i]) 4675 break; 4676 4677 if (i < 0) 4678 return NULL; /* block with no stmts. Strange. */ 4679 4680 st = bb0->stmts[i]; 4681 if (st->tag != Ist_Exit) 4682 return NULL; 4683 if (st->Ist.Exit.jk != Ijk_Boring) 4684 return NULL; 4685 4686 con = st->Ist.Exit.dst; 4687 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4688 4689 xxx_value = con->tag == Ico_U64 4690 ? st->Ist.Exit.dst->Ico.U64 4691 : (Addr64)(st->Ist.Exit.dst->Ico.U32); 4692 4693 /* If this assertion fails, we have some kind of type error. */ 4694 vassert(con->tag == udst->Iex.Const.con->tag); 4695 4696 if (xxx_value != my_addr) 4697 /* We didn't find either idiom. Give up. */ 4698 return NULL; 4699 4700 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and 4701 yyy values (which makes it look like idiom #1), and go into 4702 unrolling proper. This means finding (again) the last stmt, in 4703 the copied BB. */ 4704 4705 unroll_factor = calc_unroll_factor( bb0 ); 4706 if (unroll_factor < 2) 4707 return NULL; 4708 4709 bb1 = deepCopyIRSB( bb0 ); 4710 bb0 = NULL; 4711 udst = NULL; /* is now invalid */ 4712 for (i = bb1->stmts_used-1; i >= 0; i--) 4713 if (bb1->stmts[i]) 4714 break; 4715 4716 /* The next bunch of assertions should be true since we already 4717 found and checked the last stmt in the original bb. */ 4718 4719 vassert(i >= 0); 4720 4721 st = bb1->stmts[i]; 4722 vassert(st->tag == Ist_Exit); 4723 4724 con = st->Ist.Exit.dst; 4725 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4726 4727 udst = bb1->next; 4728 vassert(udst->tag == Iex_Const); 4729 vassert(udst->Iex.Const.con->tag == Ico_U32 4730 || udst->Iex.Const.con->tag == Ico_U64); 4731 vassert(con->tag == udst->Iex.Const.con->tag); 4732 4733 /* switch the xxx and yyy fields around */ 4734 if (con->tag == Ico_U64) { 4735 udst->Iex.Const.con->Ico.U64 = xxx_value; 4736 con->Ico.U64 = yyy_value; 4737 } else { 4738 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value; 4739 con->Ico.U32 = (UInt)yyy_value; 4740 } 4741 4742 /* negate the test condition */ 4743 st->Ist.Exit.guard 4744 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard)); 4745 4746 /* --- The unroller proper. Both idioms are by now --- */ 4747 /* --- now converted to idiom 1. --- */ 4748 4749 do_unroll: 4750 4751 vassert(unroll_factor == 2 4752 || unroll_factor == 4 4753 || unroll_factor == 8); 4754 4755 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1); 4756 for (j = 1; j <= jmax; j++) { 4757 4758 n_vars = bb1->tyenv->types_used; 4759 4760 bb2 = deepCopyIRSB(bb1); 4761 for (i = 0; i < n_vars; i++) 4762 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]); 4763 4764 for (i = 0; i < bb2->stmts_used; i++) { 4765 /* deltaIRStmt destructively modifies the stmt, but 4766 that's OK since bb2 is a complete fresh copy of bb1. */ 4767 deltaIRStmt(bb2->stmts[i], n_vars); 4768 addStmtToIRSB(bb1, bb2->stmts[i]); 4769 } 4770 } 4771 4772 if (DEBUG_IROPT) { 4773 vex_printf("\nUNROLLED (%llx)\n", my_addr); 4774 ppIRSB(bb1); 4775 vex_printf("\n"); 4776 } 4777 4778 /* Flattening; sigh. The unroller succeeds in breaking flatness 4779 by negating the test condition. This should be fixed properly. 4780 For the moment use this shotgun approach. */ 4781 return flatten_BB(bb1); 4782 } 4783 4784 4785 /*---------------------------------------------------------------*/ 4786 /*--- The tree builder ---*/ 4787 /*---------------------------------------------------------------*/ 4788 4789 /* This isn't part of IR optimisation. Really it's a pass done prior 4790 to instruction selection, which improves the code that the 4791 instruction selector can produce. */ 4792 4793 /* --- The 'tmp' environment is the central data structure here --- */ 4794 4795 /* The number of outstanding bindings we're prepared to track. 4796 The number of times the env becomes full and we have to dump 4797 the oldest binding (hence reducing code quality) falls very 4798 rapidly as the env size increases. 8 gives reasonable performance 4799 under most circumstances. */ 4800 #define A_NENV 10 4801 4802 /* An interval. Used to record the bytes in the guest state accessed 4803 by a Put[I] statement or by (one or more) Get[I] expression(s). In 4804 case of several Get[I] expressions, the lower/upper bounds are recorded. 4805 This is conservative but cheap. 4806 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107]. 4807 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at 4808 offset 200 would be recorded as [100,203] */ 4809 typedef 4810 struct { 4811 Bool present; 4812 Int low; 4813 Int high; 4814 } 4815 Interval; 4816 4817 static inline Bool 4818 intervals_overlap(Interval i1, Interval i2) 4819 { 4820 return (i1.low >= i2.low && i1.low <= i2.high) || 4821 (i2.low >= i1.low && i2.low <= i1.high); 4822 } 4823 4824 static inline void 4825 update_interval(Interval *i, Int low, Int high) 4826 { 4827 vassert(low <= high); 4828 4829 if (i->present) { 4830 if (low < i->low) i->low = low; 4831 if (high > i->high) i->high = high; 4832 } else { 4833 i->present = True; 4834 i->low = low; 4835 i->high = high; 4836 } 4837 } 4838 4839 4840 /* bindee == NULL === slot is not in use 4841 bindee != NULL === slot is in use 4842 */ 4843 typedef 4844 struct { 4845 IRTemp binder; 4846 IRExpr* bindee; 4847 Bool doesLoad; 4848 /* Record the bytes of the guest state BINDEE reads from. */ 4849 Interval getInterval; 4850 } 4851 ATmpInfo; 4852 4853 __attribute__((unused)) 4854 static void ppAEnv ( ATmpInfo* env ) 4855 { 4856 Int i; 4857 for (i = 0; i < A_NENV; i++) { 4858 vex_printf("%d tmp %d val ", i, (Int)env[i].binder); 4859 if (env[i].bindee) 4860 ppIRExpr(env[i].bindee); 4861 else 4862 vex_printf("(null)"); 4863 vex_printf("\n"); 4864 } 4865 } 4866 4867 /* --- Tree-traversal fns --- */ 4868 4869 /* Traverse an expr, and detect if any part of it reads memory or does 4870 a Get. Be careful ... this really controls how much the 4871 tree-builder can reorder the code, so getting it right is critical. 4872 */ 4873 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e ) 4874 { 4875 Int i; 4876 switch (e->tag) { 4877 case Iex_CCall: 4878 for (i = 0; e->Iex.CCall.args[i]; i++) 4879 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]); 4880 return; 4881 case Iex_ITE: 4882 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond); 4883 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue); 4884 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse); 4885 return; 4886 case Iex_Qop: 4887 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1); 4888 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2); 4889 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3); 4890 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4); 4891 return; 4892 case Iex_Triop: 4893 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1); 4894 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2); 4895 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3); 4896 return; 4897 case Iex_Binop: 4898 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1); 4899 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2); 4900 return; 4901 case Iex_Unop: 4902 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg); 4903 return; 4904 case Iex_Load: 4905 *doesLoad = True; 4906 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr); 4907 return; 4908 case Iex_Get: { 4909 Int low = e->Iex.Get.offset; 4910 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1; 4911 update_interval(getInterval, low, high); 4912 return; 4913 } 4914 case Iex_GetI: { 4915 IRRegArray *descr = e->Iex.GetI.descr; 4916 Int size = sizeofIRType(descr->elemTy); 4917 Int low = descr->base; 4918 Int high = low + descr->nElems * size - 1; 4919 update_interval(getInterval, low, high); 4920 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix); 4921 return; 4922 } 4923 case Iex_RdTmp: 4924 case Iex_Const: 4925 return; 4926 default: 4927 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4928 vpanic("setHints_Expr"); 4929 } 4930 } 4931 4932 4933 /* Add a binding to the front of the env and slide all the rest 4934 backwards. It should be the case that the last slot is free. */ 4935 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee ) 4936 { 4937 Int i; 4938 vassert(env[A_NENV-1].bindee == NULL); 4939 for (i = A_NENV-1; i >= 1; i--) 4940 env[i] = env[i-1]; 4941 env[0].binder = binder; 4942 env[0].bindee = bindee; 4943 env[0].doesLoad = False; /* filled in later */ 4944 env[0].getInterval.present = False; /* filled in later */ 4945 env[0].getInterval.low = -1; /* filled in later */ 4946 env[0].getInterval.high = -1; /* filled in later */ 4947 } 4948 4949 /* Given uses :: array of UShort, indexed by IRTemp 4950 Add the use-occurrences of temps in this expression 4951 to the env. 4952 */ 4953 static void aoccCount_Expr ( UShort* uses, IRExpr* e ) 4954 { 4955 Int i; 4956 4957 switch (e->tag) { 4958 4959 case Iex_RdTmp: /* the only interesting case */ 4960 uses[e->Iex.RdTmp.tmp]++; 4961 return; 4962 4963 case Iex_ITE: 4964 aoccCount_Expr(uses, e->Iex.ITE.cond); 4965 aoccCount_Expr(uses, e->Iex.ITE.iftrue); 4966 aoccCount_Expr(uses, e->Iex.ITE.iffalse); 4967 return; 4968 4969 case Iex_Qop: 4970 aoccCount_Expr(uses, e->Iex.Qop.details->arg1); 4971 aoccCount_Expr(uses, e->Iex.Qop.details->arg2); 4972 aoccCount_Expr(uses, e->Iex.Qop.details->arg3); 4973 aoccCount_Expr(uses, e->Iex.Qop.details->arg4); 4974 return; 4975 4976 case Iex_Triop: 4977 aoccCount_Expr(uses, e->Iex.Triop.details->arg1); 4978 aoccCount_Expr(uses, e->Iex.Triop.details->arg2); 4979 aoccCount_Expr(uses, e->Iex.Triop.details->arg3); 4980 return; 4981 4982 case Iex_Binop: 4983 aoccCount_Expr(uses, e->Iex.Binop.arg1); 4984 aoccCount_Expr(uses, e->Iex.Binop.arg2); 4985 return; 4986 4987 case Iex_Unop: 4988 aoccCount_Expr(uses, e->Iex.Unop.arg); 4989 return; 4990 4991 case Iex_Load: 4992 aoccCount_Expr(uses, e->Iex.Load.addr); 4993 return; 4994 4995 case Iex_CCall: 4996 for (i = 0; e->Iex.CCall.args[i]; i++) 4997 aoccCount_Expr(uses, e->Iex.CCall.args[i]); 4998 return; 4999 5000 case Iex_GetI: 5001 aoccCount_Expr(uses, e->Iex.GetI.ix); 5002 return; 5003 5004 case Iex_Const: 5005 case Iex_Get: 5006 return; 5007 5008 default: 5009 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5010 vpanic("aoccCount_Expr"); 5011 } 5012 } 5013 5014 5015 /* Given uses :: array of UShort, indexed by IRTemp 5016 Add the use-occurrences of temps in this statement 5017 to the env. 5018 */ 5019 static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) 5020 { 5021 Int i; 5022 IRDirty* d; 5023 IRCAS* cas; 5024 switch (st->tag) { 5025 case Ist_AbiHint: 5026 aoccCount_Expr(uses, st->Ist.AbiHint.base); 5027 aoccCount_Expr(uses, st->Ist.AbiHint.nia); 5028 return; 5029 case Ist_WrTmp: 5030 aoccCount_Expr(uses, st->Ist.WrTmp.data); 5031 return; 5032 case Ist_Put: 5033 aoccCount_Expr(uses, st->Ist.Put.data); 5034 return; 5035 case Ist_PutI: 5036 aoccCount_Expr(uses, st->Ist.PutI.details->ix); 5037 aoccCount_Expr(uses, st->Ist.PutI.details->data); 5038 return; 5039 case Ist_Store: 5040 aoccCount_Expr(uses, st->Ist.Store.addr); 5041 aoccCount_Expr(uses, st->Ist.Store.data); 5042 return; 5043 case Ist_StoreG: { 5044 IRStoreG* sg = st->Ist.StoreG.details; 5045 aoccCount_Expr(uses, sg->addr); 5046 aoccCount_Expr(uses, sg->data); 5047 aoccCount_Expr(uses, sg->guard); 5048 return; 5049 } 5050 case Ist_LoadG: { 5051 IRLoadG* lg = st->Ist.LoadG.details; 5052 aoccCount_Expr(uses, lg->addr); 5053 aoccCount_Expr(uses, lg->alt); 5054 aoccCount_Expr(uses, lg->guard); 5055 return; 5056 } 5057 case Ist_CAS: 5058 cas = st->Ist.CAS.details; 5059 aoccCount_Expr(uses, cas->addr); 5060 if (cas->expdHi) 5061 aoccCount_Expr(uses, cas->expdHi); 5062 aoccCount_Expr(uses, cas->expdLo); 5063 if (cas->dataHi) 5064 aoccCount_Expr(uses, cas->dataHi); 5065 aoccCount_Expr(uses, cas->dataLo); 5066 return; 5067 case Ist_LLSC: 5068 aoccCount_Expr(uses, st->Ist.LLSC.addr); 5069 if (st->Ist.LLSC.storedata) 5070 aoccCount_Expr(uses, st->Ist.LLSC.storedata); 5071 return; 5072 case Ist_Dirty: 5073 d = st->Ist.Dirty.details; 5074 if (d->mFx != Ifx_None) 5075 aoccCount_Expr(uses, d->mAddr); 5076 aoccCount_Expr(uses, d->guard); 5077 for (i = 0; d->args[i]; i++) { 5078 IRExpr* arg = d->args[i]; 5079 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5080 aoccCount_Expr(uses, arg); 5081 } 5082 return; 5083 case Ist_NoOp: 5084 case Ist_IMark: 5085 case Ist_MBE: 5086 return; 5087 case Ist_Exit: 5088 aoccCount_Expr(uses, st->Ist.Exit.guard); 5089 return; 5090 default: 5091 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5092 vpanic("aoccCount_Stmt"); 5093 } 5094 } 5095 5096 /* Look up a binding for tmp in the env. If found, return the bound 5097 expression, and set the env's binding to NULL so it is marked as 5098 used. If not found, return NULL. */ 5099 5100 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 5101 { 5102 Int i; 5103 for (i = 0; i < A_NENV; i++) { 5104 if (env[i].binder == tmp && env[i].bindee != NULL) { 5105 IRExpr* bindee = env[i].bindee; 5106 env[i].bindee = NULL; 5107 return bindee; 5108 } 5109 } 5110 return NULL; 5111 } 5112 5113 /* Traverse e, looking for temps. For each observed temp, see if env 5114 contains a binding for the temp, and if so return the bound value. 5115 The env has the property that any binding it holds is 5116 'single-shot', so once a binding is used, it is marked as no longer 5117 available, by setting its .bindee field to NULL. */ 5118 5119 static inline Bool is_Unop ( IRExpr* e, IROp op ) { 5120 return e->tag == Iex_Unop && e->Iex.Unop.op == op; 5121 } 5122 static inline Bool is_Binop ( IRExpr* e, IROp op ) { 5123 return e->tag == Iex_Binop && e->Iex.Binop.op == op; 5124 } 5125 5126 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 5127 { 5128 switch (op) { 5129 case Iop_Or32: 5130 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ 5131 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) 5132 return IRExpr_Unop( Iop_CmpwNEZ32, 5133 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 5134 a2->Iex.Unop.arg ) ); 5135 break; 5136 5137 case Iop_CmpNE32: 5138 /* Since X has type Ity_I1 we can simplify: 5139 CmpNE32(1Uto32(X),0)) ==> X */ 5140 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2)) 5141 return a1->Iex.Unop.arg; 5142 break; 5143 5144 default: 5145 break; 5146 } 5147 /* no reduction rule applies */ 5148 return IRExpr_Binop( op, a1, a2 ); 5149 } 5150 5151 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) 5152 { 5153 switch (op) { 5154 case Iop_CmpwNEZ64: 5155 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */ 5156 if (is_Unop(aa, Iop_CmpwNEZ64)) 5157 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg ); 5158 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5159 if (is_Binop(aa, Iop_Or64) 5160 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) 5161 return fold_IRExpr_Unop( 5162 Iop_CmpwNEZ64, 5163 IRExpr_Binop(Iop_Or64, 5164 aa->Iex.Binop.arg1->Iex.Unop.arg, 5165 aa->Iex.Binop.arg2)); 5166 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5167 if (is_Binop(aa, Iop_Or64) 5168 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) 5169 return fold_IRExpr_Unop( 5170 Iop_CmpwNEZ64, 5171 IRExpr_Binop(Iop_Or64, 5172 aa->Iex.Binop.arg1, 5173 aa->Iex.Binop.arg2->Iex.Unop.arg)); 5174 break; 5175 case Iop_CmpNEZ64: 5176 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ 5177 if (is_Unop(aa, Iop_Left64)) 5178 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); 5179 /* CmpNEZ64( 1Uto64(X) ) --> X */ 5180 if (is_Unop(aa, Iop_1Uto64)) 5181 return aa->Iex.Unop.arg; 5182 break; 5183 case Iop_CmpwNEZ32: 5184 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ 5185 if (is_Unop(aa, Iop_CmpwNEZ32)) 5186 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); 5187 break; 5188 case Iop_CmpNEZ32: 5189 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ 5190 if (is_Unop(aa, Iop_Left32)) 5191 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); 5192 /* CmpNEZ32( 1Uto32(X) ) --> X */ 5193 if (is_Unop(aa, Iop_1Uto32)) 5194 return aa->Iex.Unop.arg; 5195 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */ 5196 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64)) 5197 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg); 5198 break; 5199 case Iop_CmpNEZ8: 5200 /* CmpNEZ8( 1Uto8(X) ) --> X */ 5201 if (is_Unop(aa, Iop_1Uto8)) 5202 return aa->Iex.Unop.arg; 5203 break; 5204 case Iop_Left32: 5205 /* Left32( Left32(x) ) --> Left32(x) */ 5206 if (is_Unop(aa, Iop_Left32)) 5207 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); 5208 break; 5209 case Iop_Left64: 5210 /* Left64( Left64(x) ) --> Left64(x) */ 5211 if (is_Unop(aa, Iop_Left64)) 5212 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg ); 5213 break; 5214 case Iop_32to1: 5215 /* 32to1( 1Uto32 ( x ) ) --> x */ 5216 if (is_Unop(aa, Iop_1Uto32)) 5217 return aa->Iex.Unop.arg; 5218 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ 5219 if (is_Unop(aa, Iop_CmpwNEZ32)) 5220 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); 5221 break; 5222 case Iop_64to1: 5223 /* 64to1( 1Uto64 ( x ) ) --> x */ 5224 if (is_Unop(aa, Iop_1Uto64)) 5225 return aa->Iex.Unop.arg; 5226 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ 5227 if (is_Unop(aa, Iop_CmpwNEZ64)) 5228 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); 5229 break; 5230 case Iop_64to32: 5231 /* 64to32( 32Uto64 ( x )) --> x */ 5232 if (is_Unop(aa, Iop_32Uto64)) 5233 return aa->Iex.Unop.arg; 5234 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */ 5235 if (is_Unop(aa, Iop_8Uto64)) 5236 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg); 5237 break; 5238 5239 case Iop_32Uto64: 5240 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */ 5241 if (is_Unop(aa, Iop_8Uto32)) 5242 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg); 5243 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */ 5244 if (is_Unop(aa, Iop_16Uto32)) 5245 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg); 5246 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh )) 5247 --> Shr64( 32Uto64(64to32(x)), sh )) */ 5248 if (is_Unop(aa, Iop_64to32) 5249 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64) 5250 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5251 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5252 Iop_64to32)) { 5253 return aa->Iex.Unop.arg; 5254 } 5255 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh )) 5256 --> 32Uto64(64to32( Shl64( x, sh )) */ 5257 if (is_Unop(aa, Iop_64to32) 5258 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64) 5259 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5260 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5261 Iop_64to32)) { 5262 return 5263 IRExpr_Unop( 5264 Iop_32Uto64, 5265 IRExpr_Unop( 5266 Iop_64to32, 5267 IRExpr_Binop( 5268 Iop_Shl64, 5269 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg, 5270 aa->Iex.Unop.arg->Iex.Binop.arg2 5271 ))); 5272 } 5273 break; 5274 5275 case Iop_1Sto32: 5276 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */ 5277 if (is_Unop(aa, Iop_CmpNEZ8) 5278 && is_Unop(aa->Iex.Unop.arg, Iop_32to8) 5279 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32) 5280 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg, 5281 Iop_CmpNEZ32)) { 5282 return IRExpr_Unop( Iop_CmpwNEZ32, 5283 aa->Iex.Unop.arg->Iex.Unop.arg 5284 ->Iex.Unop.arg->Iex.Unop.arg); 5285 } 5286 break; 5287 5288 default: 5289 break; 5290 } 5291 /* no reduction rule applies */ 5292 return IRExpr_Unop( op, aa ); 5293 } 5294 5295 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) 5296 { 5297 IRExpr* e2; 5298 IRExpr** args2; 5299 Int i; 5300 5301 switch (e->tag) { 5302 5303 case Iex_CCall: 5304 args2 = shallowCopyIRExprVec(e->Iex.CCall.args); 5305 for (i = 0; args2[i]; i++) 5306 args2[i] = atbSubst_Expr(env,args2[i]); 5307 return IRExpr_CCall( 5308 e->Iex.CCall.cee, 5309 e->Iex.CCall.retty, 5310 args2 5311 ); 5312 case Iex_RdTmp: 5313 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp); 5314 return e2 ? e2 : e; 5315 case Iex_ITE: 5316 return IRExpr_ITE( 5317 atbSubst_Expr(env, e->Iex.ITE.cond), 5318 atbSubst_Expr(env, e->Iex.ITE.iftrue), 5319 atbSubst_Expr(env, e->Iex.ITE.iffalse) 5320 ); 5321 case Iex_Qop: 5322 return IRExpr_Qop( 5323 e->Iex.Qop.details->op, 5324 atbSubst_Expr(env, e->Iex.Qop.details->arg1), 5325 atbSubst_Expr(env, e->Iex.Qop.details->arg2), 5326 atbSubst_Expr(env, e->Iex.Qop.details->arg3), 5327 atbSubst_Expr(env, e->Iex.Qop.details->arg4) 5328 ); 5329 case Iex_Triop: 5330 return IRExpr_Triop( 5331 e->Iex.Triop.details->op, 5332 atbSubst_Expr(env, e->Iex.Triop.details->arg1), 5333 atbSubst_Expr(env, e->Iex.Triop.details->arg2), 5334 atbSubst_Expr(env, e->Iex.Triop.details->arg3) 5335 ); 5336 case Iex_Binop: 5337 return fold_IRExpr_Binop( 5338 e->Iex.Binop.op, 5339 atbSubst_Expr(env, e->Iex.Binop.arg1), 5340 atbSubst_Expr(env, e->Iex.Binop.arg2) 5341 ); 5342 case Iex_Unop: 5343 return fold_IRExpr_Unop( 5344 e->Iex.Unop.op, 5345 atbSubst_Expr(env, e->Iex.Unop.arg) 5346 ); 5347 case Iex_Load: 5348 return IRExpr_Load( 5349 e->Iex.Load.end, 5350 e->Iex.Load.ty, 5351 atbSubst_Expr(env, e->Iex.Load.addr) 5352 ); 5353 case Iex_GetI: 5354 return IRExpr_GetI( 5355 e->Iex.GetI.descr, 5356 atbSubst_Expr(env, e->Iex.GetI.ix), 5357 e->Iex.GetI.bias 5358 ); 5359 case Iex_Const: 5360 case Iex_Get: 5361 return e; 5362 default: 5363 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5364 vpanic("atbSubst_Expr"); 5365 } 5366 } 5367 5368 /* Same deal as atbSubst_Expr, except for stmts. */ 5369 5370 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) 5371 { 5372 Int i; 5373 IRDirty *d, *d2; 5374 IRCAS *cas, *cas2; 5375 IRPutI *puti, *puti2; 5376 5377 switch (st->tag) { 5378 case Ist_AbiHint: 5379 return IRStmt_AbiHint( 5380 atbSubst_Expr(env, st->Ist.AbiHint.base), 5381 st->Ist.AbiHint.len, 5382 atbSubst_Expr(env, st->Ist.AbiHint.nia) 5383 ); 5384 case Ist_Store: 5385 return IRStmt_Store( 5386 st->Ist.Store.end, 5387 atbSubst_Expr(env, st->Ist.Store.addr), 5388 atbSubst_Expr(env, st->Ist.Store.data) 5389 ); 5390 case Ist_StoreG: { 5391 IRStoreG* sg = st->Ist.StoreG.details; 5392 return IRStmt_StoreG(sg->end, 5393 atbSubst_Expr(env, sg->addr), 5394 atbSubst_Expr(env, sg->data), 5395 atbSubst_Expr(env, sg->guard)); 5396 } 5397 case Ist_LoadG: { 5398 IRLoadG* lg = st->Ist.LoadG.details; 5399 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 5400 atbSubst_Expr(env, lg->addr), 5401 atbSubst_Expr(env, lg->alt), 5402 atbSubst_Expr(env, lg->guard)); 5403 } 5404 case Ist_WrTmp: 5405 return IRStmt_WrTmp( 5406 st->Ist.WrTmp.tmp, 5407 atbSubst_Expr(env, st->Ist.WrTmp.data) 5408 ); 5409 case Ist_Put: 5410 return IRStmt_Put( 5411 st->Ist.Put.offset, 5412 atbSubst_Expr(env, st->Ist.Put.data) 5413 ); 5414 case Ist_PutI: 5415 puti = st->Ist.PutI.details; 5416 puti2 = mkIRPutI(puti->descr, 5417 atbSubst_Expr(env, puti->ix), 5418 puti->bias, 5419 atbSubst_Expr(env, puti->data)); 5420 return IRStmt_PutI(puti2); 5421 5422 case Ist_Exit: 5423 return IRStmt_Exit( 5424 atbSubst_Expr(env, st->Ist.Exit.guard), 5425 st->Ist.Exit.jk, 5426 st->Ist.Exit.dst, 5427 st->Ist.Exit.offsIP 5428 ); 5429 case Ist_IMark: 5430 return IRStmt_IMark(st->Ist.IMark.addr, 5431 st->Ist.IMark.len, 5432 st->Ist.IMark.delta); 5433 case Ist_NoOp: 5434 return IRStmt_NoOp(); 5435 case Ist_MBE: 5436 return IRStmt_MBE(st->Ist.MBE.event); 5437 case Ist_CAS: 5438 cas = st->Ist.CAS.details; 5439 cas2 = mkIRCAS( 5440 cas->oldHi, cas->oldLo, cas->end, 5441 atbSubst_Expr(env, cas->addr), 5442 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL, 5443 atbSubst_Expr(env, cas->expdLo), 5444 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL, 5445 atbSubst_Expr(env, cas->dataLo) 5446 ); 5447 return IRStmt_CAS(cas2); 5448 case Ist_LLSC: 5449 return IRStmt_LLSC( 5450 st->Ist.LLSC.end, 5451 st->Ist.LLSC.result, 5452 atbSubst_Expr(env, st->Ist.LLSC.addr), 5453 st->Ist.LLSC.storedata 5454 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL 5455 ); 5456 case Ist_Dirty: 5457 d = st->Ist.Dirty.details; 5458 d2 = emptyIRDirty(); 5459 *d2 = *d; 5460 if (d2->mFx != Ifx_None) 5461 d2->mAddr = atbSubst_Expr(env, d2->mAddr); 5462 d2->guard = atbSubst_Expr(env, d2->guard); 5463 for (i = 0; d2->args[i]; i++) { 5464 IRExpr* arg = d2->args[i]; 5465 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5466 d2->args[i] = atbSubst_Expr(env, arg); 5467 } 5468 return IRStmt_Dirty(d2); 5469 default: 5470 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5471 vpanic("atbSubst_Stmt"); 5472 } 5473 } 5474 5475 inline 5476 static Bool dirty_helper_stores ( const IRDirty *d ) 5477 { 5478 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify; 5479 } 5480 5481 inline 5482 static Interval dirty_helper_puts ( const IRDirty *d, 5483 Bool (*preciseMemExnsFn)(Int, Int), 5484 Bool *requiresPreciseMemExns ) 5485 { 5486 Int i; 5487 Interval interval; 5488 5489 /* Passing the guest state pointer opens the door to modifying the 5490 guest state under the covers. It's not allowed, but let's be 5491 extra conservative and assume the worst. */ 5492 for (i = 0; d->args[i]; i++) { 5493 if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) { 5494 *requiresPreciseMemExns = True; 5495 /* Assume all guest state is written. */ 5496 interval.present = True; 5497 interval.low = 0; 5498 interval.high = 0x7FFFFFFF; 5499 return interval; 5500 } 5501 } 5502 5503 /* Check the side effects on the guest state */ 5504 interval.present = False; 5505 interval.low = interval.high = -1; 5506 *requiresPreciseMemExns = False; 5507 5508 for (i = 0; i < d->nFxState; ++i) { 5509 if (d->fxState[i].fx != Ifx_Read) { 5510 Int offset = d->fxState[i].offset; 5511 Int size = d->fxState[i].size; 5512 Int nRepeats = d->fxState[i].nRepeats; 5513 Int repeatLen = d->fxState[i].repeatLen; 5514 5515 if (preciseMemExnsFn(offset, 5516 offset + nRepeats * repeatLen + size - 1)) { 5517 *requiresPreciseMemExns = True; 5518 } 5519 update_interval(&interval, offset, 5520 offset + nRepeats * repeatLen + size - 1); 5521 } 5522 } 5523 5524 return interval; 5525 } 5526 5527 /* Return an interval if st modifies the guest state. Via requiresPreciseMemExns 5528 return whether or not that modification requires precise exceptions. */ 5529 static Interval stmt_modifies_guest_state ( IRSB *bb, const IRStmt *st, 5530 Bool (*preciseMemExnsFn)(Int,Int), 5531 Bool *requiresPreciseMemExns ) 5532 { 5533 Interval interval; 5534 5535 switch (st->tag) { 5536 case Ist_Put: { 5537 Int offset = st->Ist.Put.offset; 5538 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data)); 5539 5540 *requiresPreciseMemExns = preciseMemExnsFn(offset, offset + size - 1); 5541 interval.present = True; 5542 interval.low = offset; 5543 interval.high = offset + size - 1; 5544 return interval; 5545 } 5546 5547 case Ist_PutI: { 5548 IRRegArray *descr = st->Ist.PutI.details->descr; 5549 Int offset = descr->base; 5550 Int size = sizeofIRType(descr->elemTy); 5551 5552 /* We quietly assume here that all segments are contiguous and there 5553 are no holes. This is to avoid a loop. The assumption is conservative 5554 in the sense that we might report that precise memory exceptions are 5555 needed when in fact they are not. */ 5556 *requiresPreciseMemExns = 5557 preciseMemExnsFn(offset, offset + descr->nElems * size - 1); 5558 interval.present = True; 5559 interval.low = offset; 5560 interval.high = offset + descr->nElems * size - 1; 5561 return interval; 5562 } 5563 5564 case Ist_Dirty: 5565 return dirty_helper_puts(st->Ist.Dirty.details, preciseMemExnsFn, 5566 requiresPreciseMemExns); 5567 5568 default: 5569 *requiresPreciseMemExns = False; 5570 interval.present = False; 5571 interval.low = -1; 5572 interval.high = -1; 5573 return interval; 5574 } 5575 } 5576 5577 /* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb, 5578 Bool (*preciseMemExnsFn)(Int,Int) ) 5579 { 5580 Int i, j, k, m; 5581 Bool stmtStores, invalidateMe; 5582 Interval putInterval; 5583 IRStmt* st; 5584 IRStmt* st2; 5585 ATmpInfo env[A_NENV]; 5586 5587 Bool max_ga_known = False; 5588 Addr64 max_ga = 0; 5589 5590 Int n_tmps = bb->tyenv->types_used; 5591 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort)); 5592 5593 /* Phase 1. Scan forwards in bb, counting use occurrences of each 5594 temp. Also count occurrences in the bb->next field. Take the 5595 opportunity to also find the maximum guest address in the block, 5596 since that will be needed later for deciding when we can safely 5597 elide event checks. */ 5598 5599 for (i = 0; i < n_tmps; i++) 5600 uses[i] = 0; 5601 5602 for (i = 0; i < bb->stmts_used; i++) { 5603 st = bb->stmts[i]; 5604 switch (st->tag) { 5605 case Ist_NoOp: 5606 continue; 5607 case Ist_IMark: { 5608 Int len = st->Ist.IMark.len; 5609 Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1; 5610 max_ga_known = True; 5611 if (mga > max_ga) 5612 max_ga = mga; 5613 break; 5614 } 5615 default: 5616 break; 5617 } 5618 aoccCount_Stmt( uses, st ); 5619 } 5620 aoccCount_Expr(uses, bb->next ); 5621 5622 # if 0 5623 for (i = 0; i < n_tmps; i++) { 5624 if (uses[i] == 0) 5625 continue; 5626 ppIRTemp( (IRTemp)i ); 5627 vex_printf(" used %d\n", (Int)uses[i] ); 5628 } 5629 # endif 5630 5631 /* Phase 2. Scan forwards in bb. For each statement in turn: 5632 5633 If the env is full, emit the end element. This guarantees 5634 there is at least one free slot in the following. 5635 5636 On seeing 't = E', occ(t)==1, 5637 let E'=env(E) 5638 delete this stmt 5639 add t -> E' to the front of the env 5640 Examine E' and set the hints for E' appropriately 5641 (doesLoad? doesGet?) 5642 5643 On seeing any other stmt, 5644 let stmt' = env(stmt) 5645 remove from env any 't=E' binds invalidated by stmt 5646 emit the invalidated stmts 5647 emit stmt' 5648 compact any holes in env 5649 by sliding entries towards the front 5650 5651 Finally, apply env to bb->next. 5652 */ 5653 5654 for (i = 0; i < A_NENV; i++) { 5655 env[i].bindee = NULL; 5656 env[i].binder = IRTemp_INVALID; 5657 } 5658 5659 /* The stmts in bb are being reordered, and we are guaranteed to 5660 end up with no more than the number we started with. Use i to 5661 be the cursor of the current stmt examined and j <= i to be that 5662 for the current stmt being written. 5663 */ 5664 j = 0; 5665 for (i = 0; i < bb->stmts_used; i++) { 5666 5667 st = bb->stmts[i]; 5668 if (st->tag == Ist_NoOp) 5669 continue; 5670 5671 /* Ensure there's at least one space in the env, by emitting 5672 the oldest binding if necessary. */ 5673 if (env[A_NENV-1].bindee != NULL) { 5674 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder, 5675 env[A_NENV-1].bindee ); 5676 j++; 5677 vassert(j <= i); 5678 env[A_NENV-1].bindee = NULL; 5679 } 5680 5681 /* Consider current stmt. */ 5682 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) { 5683 IRExpr *e, *e2; 5684 5685 /* optional extra: dump dead bindings as we find them. 5686 Removes the need for a prior dead-code removal pass. */ 5687 if (uses[st->Ist.WrTmp.tmp] == 0) { 5688 if (0) vex_printf("DEAD binding\n"); 5689 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5690 } 5691 vassert(uses[st->Ist.WrTmp.tmp] == 1); 5692 5693 /* ok, we have 't = E', occ(t)==1. Do the abovementioned 5694 actions. */ 5695 e = st->Ist.WrTmp.data; 5696 e2 = atbSubst_Expr(env, e); 5697 addToEnvFront(env, st->Ist.WrTmp.tmp, e2); 5698 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2); 5699 /* don't advance j, as we are deleting this stmt and instead 5700 holding it temporarily in the env. */ 5701 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5702 } 5703 5704 /* we get here for any other kind of statement. */ 5705 /* 'use up' any bindings required by the current statement. */ 5706 st2 = atbSubst_Stmt(env, st); 5707 5708 /* Now, before this stmt, dump any bindings in env that it 5709 invalidates. These need to be dumped in the order in which 5710 they originally entered env -- that means from oldest to 5711 youngest. */ 5712 5713 /* putInterval/stmtStores characterise what the stmt under 5714 consideration does, or might do (sidely safe @ True). */ 5715 5716 Bool putRequiresPreciseMemExns; 5717 putInterval = stmt_modifies_guest_state( bb, st, preciseMemExnsFn, 5718 &putRequiresPreciseMemExns); 5719 5720 /* be True if this stmt writes memory or might do (==> we don't 5721 want to reorder other loads or stores relative to it). Also, 5722 both LL and SC fall under this classification, since we 5723 really ought to be conservative and not reorder any other 5724 memory transactions relative to them. */ 5725 stmtStores 5726 = toBool( st->tag == Ist_Store 5727 || (st->tag == Ist_Dirty 5728 && dirty_helper_stores(st->Ist.Dirty.details)) 5729 || st->tag == Ist_LLSC 5730 || st->tag == Ist_CAS ); 5731 5732 for (k = A_NENV-1; k >= 0; k--) { 5733 if (env[k].bindee == NULL) 5734 continue; 5735 /* Compare the actions of this stmt with the actions of 5736 binding 'k', to see if they invalidate the binding. */ 5737 invalidateMe 5738 = toBool( 5739 /* a store invalidates loaded data */ 5740 (env[k].doesLoad && stmtStores) 5741 /* a put invalidates get'd data, if they overlap */ 5742 || ((env[k].getInterval.present && putInterval.present) && 5743 intervals_overlap(env[k].getInterval, putInterval)) 5744 /* a put invalidates loaded data. That means, in essense, that 5745 a load expression cannot be substituted into a statement 5746 that follows the put. But there is nothing wrong doing so 5747 except when the put statement requries precise exceptions. 5748 Think of a load that is moved past a put where the put 5749 updates the IP in the guest state. If the load generates 5750 a segfault, the wrong address (line number) would be 5751 reported. */ 5752 || (env[k].doesLoad && putInterval.present && 5753 putRequiresPreciseMemExns) 5754 /* probably overly conservative: a memory bus event 5755 invalidates absolutely everything, so that all 5756 computation prior to it is forced to complete before 5757 proceeding with the event (fence,lock,unlock). */ 5758 || st->tag == Ist_MBE 5759 /* also be (probably overly) paranoid re AbiHints */ 5760 || st->tag == Ist_AbiHint 5761 ); 5762 if (invalidateMe) { 5763 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee ); 5764 j++; 5765 vassert(j <= i); 5766 env[k].bindee = NULL; 5767 } 5768 } 5769 5770 /* Slide in-use entries in env up to the front */ 5771 m = 0; 5772 for (k = 0; k < A_NENV; k++) { 5773 if (env[k].bindee != NULL) { 5774 env[m] = env[k]; 5775 m++; 5776 } 5777 } 5778 for (m = m; m < A_NENV; m++) { 5779 env[m].bindee = NULL; 5780 } 5781 5782 /* finally, emit the substituted statement */ 5783 bb->stmts[j] = st2; 5784 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */ 5785 j++; 5786 5787 vassert(j <= i+1); 5788 } /* for each stmt in the original bb ... */ 5789 5790 /* Finally ... substitute the ->next field as much as possible, and 5791 dump any left-over bindings. Hmm. Perhaps there should be no 5792 left over bindings? Or any left-over bindings are 5793 by definition dead? */ 5794 bb->next = atbSubst_Expr(env, bb->next); 5795 bb->stmts_used = j; 5796 5797 return max_ga_known ? max_ga : ~(Addr64)0; 5798 } 5799 5800 5801 /*---------------------------------------------------------------*/ 5802 /*--- iropt main ---*/ 5803 /*---------------------------------------------------------------*/ 5804 5805 static Bool iropt_verbose = False; /* True; */ 5806 5807 5808 /* Do a simple cleanup pass on bb. This is: redundant Get removal, 5809 redundant Put removal, constant propagation, dead code removal, 5810 clean helper specialisation, and dead code removal (again). 5811 */ 5812 5813 5814 static 5815 IRSB* cheap_transformations ( 5816 IRSB* bb, 5817 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 5818 Bool (*preciseMemExnsFn)(Int,Int) 5819 ) 5820 { 5821 redundant_get_removal_BB ( bb ); 5822 if (iropt_verbose) { 5823 vex_printf("\n========= REDUNDANT GET\n\n" ); 5824 ppIRSB(bb); 5825 } 5826 5827 if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) { 5828 redundant_put_removal_BB ( bb, preciseMemExnsFn ); 5829 } 5830 if (iropt_verbose) { 5831 vex_printf("\n========= REDUNDANT PUT\n\n" ); 5832 ppIRSB(bb); 5833 } 5834 5835 bb = cprop_BB ( bb ); 5836 if (iropt_verbose) { 5837 vex_printf("\n========= CPROPD\n\n" ); 5838 ppIRSB(bb); 5839 } 5840 5841 do_deadcode_BB ( bb ); 5842 if (iropt_verbose) { 5843 vex_printf("\n========= DEAD\n\n" ); 5844 ppIRSB(bb); 5845 } 5846 5847 bb = spec_helpers_BB ( bb, specHelper ); 5848 do_deadcode_BB ( bb ); 5849 if (iropt_verbose) { 5850 vex_printf("\n========= SPECd \n\n" ); 5851 ppIRSB(bb); 5852 } 5853 5854 return bb; 5855 } 5856 5857 5858 /* Do some more expensive transformations on bb, which are aimed at 5859 optimising as much as possible in the presence of GetI and PutI. */ 5860 5861 static 5862 IRSB* expensive_transformations( IRSB* bb ) 5863 { 5864 (void)do_cse_BB( bb ); 5865 collapse_AddSub_chains_BB( bb ); 5866 do_redundant_GetI_elimination( bb ); 5867 if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) { 5868 do_redundant_PutI_elimination( bb ); 5869 } 5870 do_deadcode_BB( bb ); 5871 return bb; 5872 } 5873 5874 5875 /* Scan a flattened BB to look for signs that more expensive 5876 optimisations might be useful: 5877 - find out if there are any GetIs and PutIs 5878 - find out if there are any floating or vector-typed temporaries 5879 */ 5880 5881 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, 5882 /*OUT*/Bool* hasVorFtemps, 5883 IRSB* bb ) 5884 { 5885 Int i, j; 5886 IRStmt* st; 5887 IRDirty* d; 5888 IRCAS* cas; 5889 5890 *hasGetIorPutI = False; 5891 *hasVorFtemps = False; 5892 5893 for (i = 0; i < bb->stmts_used; i++) { 5894 st = bb->stmts[i]; 5895 switch (st->tag) { 5896 case Ist_AbiHint: 5897 vassert(isIRAtom(st->Ist.AbiHint.base)); 5898 vassert(isIRAtom(st->Ist.AbiHint.nia)); 5899 break; 5900 case Ist_PutI: 5901 *hasGetIorPutI = True; 5902 break; 5903 case Ist_WrTmp: 5904 if (st->Ist.WrTmp.data->tag == Iex_GetI) 5905 *hasGetIorPutI = True; 5906 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) { 5907 case Ity_I1: case Ity_I8: case Ity_I16: 5908 case Ity_I32: case Ity_I64: case Ity_I128: 5909 break; 5910 case Ity_F32: case Ity_F64: case Ity_F128: 5911 case Ity_V128: case Ity_V256: 5912 *hasVorFtemps = True; 5913 break; 5914 case Ity_D32: case Ity_D64: case Ity_D128: 5915 *hasVorFtemps = True; 5916 break; 5917 default: 5918 goto bad; 5919 } 5920 break; 5921 case Ist_Put: 5922 vassert(isIRAtom(st->Ist.Put.data)); 5923 break; 5924 case Ist_Store: 5925 vassert(isIRAtom(st->Ist.Store.addr)); 5926 vassert(isIRAtom(st->Ist.Store.data)); 5927 break; 5928 case Ist_StoreG: { 5929 IRStoreG* sg = st->Ist.StoreG.details; 5930 vassert(isIRAtom(sg->addr)); 5931 vassert(isIRAtom(sg->data)); 5932 vassert(isIRAtom(sg->guard)); 5933 break; 5934 } 5935 case Ist_LoadG: { 5936 IRLoadG* lg = st->Ist.LoadG.details; 5937 vassert(isIRAtom(lg->addr)); 5938 vassert(isIRAtom(lg->alt)); 5939 vassert(isIRAtom(lg->guard)); 5940 break; 5941 } 5942 case Ist_CAS: 5943 cas = st->Ist.CAS.details; 5944 vassert(isIRAtom(cas->addr)); 5945 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 5946 vassert(isIRAtom(cas->expdLo)); 5947 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 5948 vassert(isIRAtom(cas->dataLo)); 5949 break; 5950 case Ist_LLSC: 5951 vassert(isIRAtom(st->Ist.LLSC.addr)); 5952 if (st->Ist.LLSC.storedata) 5953 vassert(isIRAtom(st->Ist.LLSC.storedata)); 5954 break; 5955 case Ist_Dirty: 5956 d = st->Ist.Dirty.details; 5957 vassert(isIRAtom(d->guard)); 5958 for (j = 0; d->args[j]; j++) { 5959 IRExpr* arg = d->args[j]; 5960 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5961 vassert(isIRAtom(arg)); 5962 } 5963 if (d->mFx != Ifx_None) 5964 vassert(isIRAtom(d->mAddr)); 5965 break; 5966 case Ist_NoOp: 5967 case Ist_IMark: 5968 case Ist_MBE: 5969 break; 5970 case Ist_Exit: 5971 vassert(isIRAtom(st->Ist.Exit.guard)); 5972 break; 5973 default: 5974 bad: 5975 ppIRStmt(st); 5976 vpanic("considerExpensives"); 5977 } 5978 } 5979 } 5980 5981 5982 /* ---------------- The main iropt entry point. ---------------- */ 5983 5984 /* exported from this file */ 5985 /* Rules of the game: 5986 5987 - IRExpr/IRStmt trees should be treated as immutable, as they 5988 may get shared. So never change a field of such a tree node; 5989 instead construct and return a new one if needed. 5990 */ 5991 5992 5993 IRSB* do_iropt_BB( 5994 IRSB* bb0, 5995 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 5996 Bool (*preciseMemExnsFn)(Int,Int), 5997 Addr64 guest_addr, 5998 VexArch guest_arch 5999 ) 6000 { 6001 static Int n_total = 0; 6002 static Int n_expensive = 0; 6003 6004 Bool hasGetIorPutI, hasVorFtemps; 6005 IRSB *bb, *bb2; 6006 6007 n_total++; 6008 6009 /* First flatten the block out, since all other 6010 phases assume flat code. */ 6011 6012 bb = flatten_BB ( bb0 ); 6013 6014 if (iropt_verbose) { 6015 vex_printf("\n========= FLAT\n\n" ); 6016 ppIRSB(bb); 6017 } 6018 6019 /* If at level 0, stop now. */ 6020 if (vex_control.iropt_level <= 0) return bb; 6021 6022 /* Now do a preliminary cleanup pass, and figure out if we also 6023 need to do 'expensive' optimisations. Expensive optimisations 6024 are deemed necessary if the block contains any GetIs or PutIs. 6025 If needed, do expensive transformations and then another cheap 6026 cleanup pass. */ 6027 6028 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 6029 6030 if (guest_arch == VexArchARM) { 6031 /* Translating Thumb2 code produces a lot of chaff. We have to 6032 work extra hard to get rid of it. */ 6033 bb = cprop_BB(bb); 6034 bb = spec_helpers_BB ( bb, specHelper ); 6035 if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) { 6036 redundant_put_removal_BB ( bb, preciseMemExnsFn ); 6037 } 6038 do_cse_BB( bb ); 6039 do_deadcode_BB( bb ); 6040 } 6041 6042 if (vex_control.iropt_level > 1) { 6043 6044 /* Peer at what we have, to decide how much more effort to throw 6045 at it. */ 6046 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb ); 6047 6048 if (hasVorFtemps && !hasGetIorPutI) { 6049 /* If any evidence of FP or Vector activity, CSE, as that 6050 tends to mop up all manner of lardy code to do with 6051 rounding modes. Don't bother if hasGetIorPutI since that 6052 case leads into the expensive transformations, which do 6053 CSE anyway. */ 6054 (void)do_cse_BB( bb ); 6055 do_deadcode_BB( bb ); 6056 } 6057 6058 if (hasGetIorPutI) { 6059 Bool cses; 6060 n_expensive++; 6061 if (DEBUG_IROPT) 6062 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive); 6063 bb = expensive_transformations( bb ); 6064 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 6065 /* Potentially common up GetIs */ 6066 cses = do_cse_BB( bb ); 6067 if (cses) 6068 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 6069 } 6070 6071 /* Now have a go at unrolling simple (single-BB) loops. If 6072 successful, clean up the results as much as possible. */ 6073 6074 bb2 = maybe_loop_unroll_BB( bb, guest_addr ); 6075 if (bb2) { 6076 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn ); 6077 if (hasGetIorPutI) { 6078 bb = expensive_transformations( bb ); 6079 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn ); 6080 } else { 6081 /* at least do CSE and dead code removal */ 6082 do_cse_BB( bb ); 6083 do_deadcode_BB( bb ); 6084 } 6085 if (0) vex_printf("vex iropt: unrolled a loop\n"); 6086 } 6087 6088 } 6089 6090 return bb; 6091 } 6092 6093 6094 /*---------------------------------------------------------------*/ 6095 /*--- end ir_opt.c ---*/ 6096 /*---------------------------------------------------------------*/ 6097