1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*---------------------------------------------------------------*/ 4 /*--- begin ir_opt.c ---*/ 5 /*---------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2004-2015 OpenWorks LLP 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35 */ 36 37 #include "libvex_basictypes.h" 38 #include "libvex_ir.h" 39 #include "libvex.h" 40 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "ir_opt.h" 44 45 46 /* Set to 1 for lots of debugging output. */ 47 #define DEBUG_IROPT 0 48 49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */ 50 #define STATS_IROPT 0 51 52 53 /* What iropt does, 29 Dec 04. 54 55 It takes an IRSB and produces a new one with the same meaning, 56 defined thus: 57 58 After execution of the new BB, all guest state and guest memory is 59 the same as after execution of the original. This is true 60 regardless of how the block was exited (at the end vs side exit). 61 62 In addition, parts of the guest state will be identical to that 63 created by execution of the original at the following observation 64 points: 65 66 * In a dirty helper call, any parts of the guest state that the 67 helper states that it reads or modifies will be up to date. 68 Also, guest memory will be up to date. Parts of the guest state 69 not marked as being read or modified by the helper cannot be 70 assumed to be up-to-date at the point where the helper is called. 71 72 * If iropt_register_updates == VexRegUpdSpAtMemAccess : 73 The guest state is only up to date only as explained above 74 (i.e. at SB exits and as specified by dirty helper call). 75 Also, the stack pointer register is up to date at memory 76 exception points (as this is needed for the stack extension 77 logic in m_signals.c). 78 79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess : 80 Immediately prior to any load or store, those parts of the guest 81 state marked as requiring precise exceptions will be up to date. 82 Also, guest memory will be up to date. Parts of the guest state 83 not marked as requiring precise exceptions cannot be assumed to 84 be up-to-date at the point of the load/store. 85 86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess: 87 Same as minimal, but all the guest state is up to date at memory 88 exception points. 89 90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn : 91 Guest state is up to date at each instruction. 92 93 The relative order of loads and stores (including loads/stores of 94 guest memory done by dirty helpers annotated as such) is not 95 changed. However, the relative order of loads with no intervening 96 stores/modifies may be changed. 97 98 Transformation order 99 ~~~~~~~~~~~~~~~~~~~~ 100 101 There are three levels of optimisation, controlled by 102 vex_control.iropt_level. Define first: 103 104 "Cheap transformations" are the following sequence: 105 * Redundant-Get removal 106 * Redundant-Put removal 107 * Constant propagation/folding 108 * Dead code removal 109 * Specialisation of clean helper functions 110 * Dead code removal 111 112 "Expensive transformations" are the following sequence: 113 * CSE 114 * Folding of add/sub chains 115 * Redundant-GetI removal 116 * Redundant-PutI removal 117 * Dead code removal 118 119 Then the transformations are as follows, as defined by 120 vex_control.iropt_level: 121 122 Level 0: 123 * Flatten into atomic form. 124 125 Level 1: the following sequence: 126 * Flatten into atomic form. 127 * Cheap transformations. 128 129 Level 2: the following sequence 130 * Flatten into atomic form. 131 * Cheap transformations. 132 * If block contains any floating or vector types, CSE. 133 * If block contains GetI or PutI, Expensive transformations. 134 * Try unrolling loops. Three possible outcomes: 135 - No effect: do nothing more. 136 - Unrolled a loop, and block does not contain GetI or PutI: 137 Do: * CSE 138 * Dead code removal 139 - Unrolled a loop, and block contains GetI or PutI: 140 Do: * Expensive transformations 141 * Cheap transformations 142 */ 143 144 /* Implementation notes, 29 Dec 04. 145 146 TODO (important): I think rPutI removal ignores precise exceptions 147 and is therefore in a sense, wrong. In the sense that PutIs are 148 assumed not to write parts of the guest state that we need to have 149 up-to-date at loads/stores. So far on x86 guest that has not 150 mattered since indeed only the x87 FP registers and tags are 151 accessed using GetI/PutI, and there is no need so far for them to 152 be up to date at mem exception points. The rPutI pass should be 153 fixed. 154 155 TODO: improve pessimistic handling of precise exceptions 156 in the tree builder. 157 158 TODO: check interaction of rGetI and dirty helpers. 159 160 F64i constants are treated differently from other constants. 161 They are not regarded as atoms, and instead lifted off and 162 bound to temps. This allows them to participate in CSE, which 163 is important for getting good performance for x86 guest code. 164 165 CSE up F64 literals (already doing F64is) 166 167 CSE: consider carefully the requirement for precise exns 168 prior to making CSE any more aggressive. */ 169 170 171 /*---------------------------------------------------------------*/ 172 /*--- Finite mappery, of a sort ---*/ 173 /*---------------------------------------------------------------*/ 174 175 /* General map from HWord-sized thing HWord-sized thing. Could be by 176 hashing, but it's not clear whether or not this would really be any 177 faster. */ 178 179 typedef 180 struct { 181 Bool* inuse; 182 HWord* key; 183 HWord* val; 184 Int size; 185 Int used; 186 } 187 HashHW; 188 189 static HashHW* newHHW ( void ) 190 { 191 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW)); 192 h->size = 8; 193 h->used = 0; 194 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool)); 195 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord)); 196 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord)); 197 return h; 198 } 199 200 201 /* Look up key in the map. */ 202 203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key ) 204 { 205 Int i; 206 /* vex_printf("lookupHHW(%llx)\n", key ); */ 207 for (i = 0; i < h->used; i++) { 208 if (h->inuse[i] && h->key[i] == key) { 209 if (val) 210 *val = h->val[i]; 211 return True; 212 } 213 } 214 return False; 215 } 216 217 218 /* Add key->val to the map. Replaces any existing binding for key. */ 219 220 static void addToHHW ( HashHW* h, HWord key, HWord val ) 221 { 222 Int i, j; 223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */ 224 225 /* Find and replace existing binding, if any. */ 226 for (i = 0; i < h->used; i++) { 227 if (h->inuse[i] && h->key[i] == key) { 228 h->val[i] = val; 229 return; 230 } 231 } 232 233 /* Ensure a space is available. */ 234 if (h->used == h->size) { 235 /* Copy into arrays twice the size. */ 236 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool)); 237 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord)); 238 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord)); 239 for (i = j = 0; i < h->size; i++) { 240 if (!h->inuse[i]) continue; 241 inuse2[j] = True; 242 key2[j] = h->key[i]; 243 val2[j] = h->val[i]; 244 j++; 245 } 246 h->used = j; 247 h->size *= 2; 248 h->inuse = inuse2; 249 h->key = key2; 250 h->val = val2; 251 } 252 253 /* Finally, add it. */ 254 vassert(h->used < h->size); 255 h->inuse[h->used] = True; 256 h->key[h->used] = key; 257 h->val[h->used] = val; 258 h->used++; 259 } 260 261 262 /*---------------------------------------------------------------*/ 263 /*--- Flattening out a BB into atomic SSA form ---*/ 264 /*---------------------------------------------------------------*/ 265 266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp 267 copies made by flattening. If in doubt return False. */ 268 269 static Bool isFlat ( IRExpr* e ) 270 { 271 if (e->tag == Iex_Get) 272 return True; 273 if (e->tag == Iex_Binop) 274 return toBool( isIRAtom(e->Iex.Binop.arg1) 275 && isIRAtom(e->Iex.Binop.arg2) ); 276 if (e->tag == Iex_Load) 277 return isIRAtom(e->Iex.Load.addr); 278 return False; 279 } 280 281 /* Flatten out 'ex' so it is atomic, returning a new expression with 282 the same value, after having appended extra IRTemp assignments to 283 the end of 'bb'. */ 284 285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) 286 { 287 Int i; 288 IRExpr** newargs; 289 IRType ty = typeOfIRExpr(bb->tyenv, ex); 290 IRTemp t1; 291 292 switch (ex->tag) { 293 294 case Iex_GetI: 295 t1 = newIRTemp(bb->tyenv, ty); 296 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 297 IRExpr_GetI(ex->Iex.GetI.descr, 298 flatten_Expr(bb, ex->Iex.GetI.ix), 299 ex->Iex.GetI.bias))); 300 return IRExpr_RdTmp(t1); 301 302 case Iex_Get: 303 t1 = newIRTemp(bb->tyenv, ty); 304 addStmtToIRSB(bb, 305 IRStmt_WrTmp(t1, ex)); 306 return IRExpr_RdTmp(t1); 307 308 case Iex_Qop: { 309 IRQop* qop = ex->Iex.Qop.details; 310 t1 = newIRTemp(bb->tyenv, ty); 311 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 312 IRExpr_Qop(qop->op, 313 flatten_Expr(bb, qop->arg1), 314 flatten_Expr(bb, qop->arg2), 315 flatten_Expr(bb, qop->arg3), 316 flatten_Expr(bb, qop->arg4)))); 317 return IRExpr_RdTmp(t1); 318 } 319 320 case Iex_Triop: { 321 IRTriop* triop = ex->Iex.Triop.details; 322 t1 = newIRTemp(bb->tyenv, ty); 323 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 324 IRExpr_Triop(triop->op, 325 flatten_Expr(bb, triop->arg1), 326 flatten_Expr(bb, triop->arg2), 327 flatten_Expr(bb, triop->arg3)))); 328 return IRExpr_RdTmp(t1); 329 } 330 331 case Iex_Binop: 332 t1 = newIRTemp(bb->tyenv, ty); 333 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 334 IRExpr_Binop(ex->Iex.Binop.op, 335 flatten_Expr(bb, ex->Iex.Binop.arg1), 336 flatten_Expr(bb, ex->Iex.Binop.arg2)))); 337 return IRExpr_RdTmp(t1); 338 339 case Iex_Unop: 340 t1 = newIRTemp(bb->tyenv, ty); 341 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 342 IRExpr_Unop(ex->Iex.Unop.op, 343 flatten_Expr(bb, ex->Iex.Unop.arg)))); 344 return IRExpr_RdTmp(t1); 345 346 case Iex_Load: 347 t1 = newIRTemp(bb->tyenv, ty); 348 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 349 IRExpr_Load(ex->Iex.Load.end, 350 ex->Iex.Load.ty, 351 flatten_Expr(bb, ex->Iex.Load.addr)))); 352 return IRExpr_RdTmp(t1); 353 354 case Iex_CCall: 355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); 356 for (i = 0; newargs[i]; i++) 357 newargs[i] = flatten_Expr(bb, newargs[i]); 358 t1 = newIRTemp(bb->tyenv, ty); 359 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 360 IRExpr_CCall(ex->Iex.CCall.cee, 361 ex->Iex.CCall.retty, 362 newargs))); 363 return IRExpr_RdTmp(t1); 364 365 case Iex_ITE: 366 t1 = newIRTemp(bb->tyenv, ty); 367 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond), 369 flatten_Expr(bb, ex->Iex.ITE.iftrue), 370 flatten_Expr(bb, ex->Iex.ITE.iffalse)))); 371 return IRExpr_RdTmp(t1); 372 373 case Iex_Const: 374 /* Lift F64i constants out onto temps so they can be CSEd 375 later. */ 376 if (ex->Iex.Const.con->tag == Ico_F64i) { 377 t1 = newIRTemp(bb->tyenv, ty); 378 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 379 IRExpr_Const(ex->Iex.Const.con))); 380 return IRExpr_RdTmp(t1); 381 } else { 382 /* Leave all other constants alone. */ 383 return ex; 384 } 385 386 case Iex_RdTmp: 387 return ex; 388 389 default: 390 vex_printf("\n"); 391 ppIRExpr(ex); 392 vex_printf("\n"); 393 vpanic("flatten_Expr"); 394 } 395 } 396 397 398 /* Append a completely flattened form of 'st' to the end of 'bb'. */ 399 400 static void flatten_Stmt ( IRSB* bb, IRStmt* st ) 401 { 402 Int i; 403 IRExpr *e1, *e2, *e3, *e4, *e5; 404 IRDirty *d, *d2; 405 IRCAS *cas, *cas2; 406 IRPutI *puti, *puti2; 407 IRLoadG *lg; 408 IRStoreG *sg; 409 switch (st->tag) { 410 case Ist_Put: 411 if (isIRAtom(st->Ist.Put.data)) { 412 /* optimisation to reduce the amount of heap wasted 413 by the flattener */ 414 addStmtToIRSB(bb, st); 415 } else { 416 /* general case, always correct */ 417 e1 = flatten_Expr(bb, st->Ist.Put.data); 418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); 419 } 420 break; 421 case Ist_PutI: 422 puti = st->Ist.PutI.details; 423 e1 = flatten_Expr(bb, puti->ix); 424 e2 = flatten_Expr(bb, puti->data); 425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2); 426 addStmtToIRSB(bb, IRStmt_PutI(puti2)); 427 break; 428 case Ist_WrTmp: 429 if (isFlat(st->Ist.WrTmp.data)) { 430 /* optimisation, to reduce the number of tmp-tmp 431 copies generated */ 432 addStmtToIRSB(bb, st); 433 } else { 434 /* general case, always correct */ 435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data); 436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); 437 } 438 break; 439 case Ist_Store: 440 e1 = flatten_Expr(bb, st->Ist.Store.addr); 441 e2 = flatten_Expr(bb, st->Ist.Store.data); 442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); 443 break; 444 case Ist_StoreG: 445 sg = st->Ist.StoreG.details; 446 e1 = flatten_Expr(bb, sg->addr); 447 e2 = flatten_Expr(bb, sg->data); 448 e3 = flatten_Expr(bb, sg->guard); 449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3)); 450 break; 451 case Ist_LoadG: 452 lg = st->Ist.LoadG.details; 453 e1 = flatten_Expr(bb, lg->addr); 454 e2 = flatten_Expr(bb, lg->alt); 455 e3 = flatten_Expr(bb, lg->guard); 456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 457 e1, e2, e3)); 458 break; 459 case Ist_CAS: 460 cas = st->Ist.CAS.details; 461 e1 = flatten_Expr(bb, cas->addr); 462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; 463 e3 = flatten_Expr(bb, cas->expdLo); 464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; 465 e5 = flatten_Expr(bb, cas->dataLo); 466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, 467 e1, e2, e3, e4, e5 ); 468 addStmtToIRSB(bb, IRStmt_CAS(cas2)); 469 break; 470 case Ist_LLSC: 471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr); 472 e2 = st->Ist.LLSC.storedata 473 ? flatten_Expr(bb, st->Ist.LLSC.storedata) 474 : NULL; 475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, 476 st->Ist.LLSC.result, e1, e2)); 477 break; 478 case Ist_Dirty: 479 d = st->Ist.Dirty.details; 480 d2 = emptyIRDirty(); 481 *d2 = *d; 482 d2->args = shallowCopyIRExprVec(d2->args); 483 if (d2->mFx != Ifx_None) { 484 d2->mAddr = flatten_Expr(bb, d2->mAddr); 485 } else { 486 vassert(d2->mAddr == NULL); 487 } 488 d2->guard = flatten_Expr(bb, d2->guard); 489 for (i = 0; d2->args[i]; i++) { 490 IRExpr* arg = d2->args[i]; 491 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 492 d2->args[i] = flatten_Expr(bb, arg); 493 } 494 addStmtToIRSB(bb, IRStmt_Dirty(d2)); 495 break; 496 case Ist_NoOp: 497 case Ist_MBE: 498 case Ist_IMark: 499 addStmtToIRSB(bb, st); 500 break; 501 case Ist_AbiHint: 502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base); 503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); 504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); 505 break; 506 case Ist_Exit: 507 e1 = flatten_Expr(bb, st->Ist.Exit.guard); 508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, 509 st->Ist.Exit.dst, 510 st->Ist.Exit.offsIP)); 511 break; 512 default: 513 vex_printf("\n"); 514 ppIRStmt(st); 515 vex_printf("\n"); 516 vpanic("flatten_Stmt"); 517 } 518 } 519 520 521 static IRSB* flatten_BB ( IRSB* in ) 522 { 523 Int i; 524 IRSB* out; 525 out = emptyIRSB(); 526 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 527 for (i = 0; i < in->stmts_used; i++) 528 if (in->stmts[i]) 529 flatten_Stmt( out, in->stmts[i] ); 530 out->next = flatten_Expr( out, in->next ); 531 out->jumpkind = in->jumpkind; 532 out->offsIP = in->offsIP; 533 return out; 534 } 535 536 537 /*---------------------------------------------------------------*/ 538 /*--- In-place removal of redundant GETs ---*/ 539 /*---------------------------------------------------------------*/ 540 541 /* Scan forwards, building up an environment binding (min offset, max 542 offset) pairs to values, which will either be temps or constants. 543 544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the 545 env and if it matches, replace the Get with the stored value. If 546 there is no match, add a (minoff,maxoff) :-> t binding. 547 548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env 549 any binding which fully or partially overlaps with (minoff,maxoff). 550 Then add a new (minoff,maxoff) :-> t or c binding. */ 551 552 /* Extract the min/max offsets from a guest state array descriptor. */ 553 554 inline 555 static void getArrayBounds ( IRRegArray* descr, 556 UInt* minoff, UInt* maxoff ) 557 { 558 *minoff = descr->base; 559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1; 560 vassert((*minoff & ~0xFFFF) == 0); 561 vassert((*maxoff & ~0xFFFF) == 0); 562 vassert(*minoff <= *maxoff); 563 } 564 565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */ 566 567 static UInt mk_key_GetPut ( Int offset, IRType ty ) 568 { 569 /* offset should fit in 16 bits. */ 570 UInt minoff = offset; 571 UInt maxoff = minoff + sizeofIRType(ty) - 1; 572 vassert((minoff & ~0xFFFF) == 0); 573 vassert((maxoff & ~0xFFFF) == 0); 574 return (minoff << 16) | maxoff; 575 } 576 577 static UInt mk_key_GetIPutI ( IRRegArray* descr ) 578 { 579 UInt minoff, maxoff; 580 getArrayBounds( descr, &minoff, &maxoff ); 581 vassert((minoff & ~0xFFFF) == 0); 582 vassert((maxoff & ~0xFFFF) == 0); 583 return (minoff << 16) | maxoff; 584 } 585 586 /* Supposing h has keys of the form generated by mk_key_GetPut and 587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo 588 .. k_hi). 589 */ 590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) 591 { 592 Int j; 593 UInt e_lo, e_hi; 594 vassert(k_lo <= k_hi); 595 /* invalidate any env entries which in any way overlap (k_lo 596 .. k_hi) */ 597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */ 598 599 for (j = 0; j < h->used; j++) { 600 if (!h->inuse[j]) 601 continue; 602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF; 603 e_hi = ((UInt)h->key[j]) & 0xFFFF; 604 vassert(e_lo <= e_hi); 605 if (e_hi < k_lo || k_hi < e_lo) 606 continue; /* no overlap possible */ 607 else 608 /* overlap; invalidate */ 609 h->inuse[j] = False; 610 } 611 } 612 613 614 static void redundant_get_removal_BB ( IRSB* bb ) 615 { 616 HashHW* env = newHHW(); 617 UInt key = 0; /* keep gcc -O happy */ 618 Int i, j; 619 HWord val; 620 621 for (i = 0; i < bb->stmts_used; i++) { 622 IRStmt* st = bb->stmts[i]; 623 624 if (st->tag == Ist_NoOp) 625 continue; 626 627 /* Deal with Gets */ 628 if (st->tag == Ist_WrTmp 629 && st->Ist.WrTmp.data->tag == Iex_Get) { 630 /* st is 't = Get(...)'. Look up in the environment and see 631 if the Get can be replaced. */ 632 IRExpr* get = st->Ist.WrTmp.data; 633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset, 634 get->Iex.Get.ty ); 635 if (lookupHHW(env, &val, (HWord)key)) { 636 /* found it */ 637 /* Note, we could do better here. If the types are 638 different we don't do the substitution, since doing so 639 could lead to invalidly-typed IR. An improvement would 640 be to stick in a reinterpret-style cast, although that 641 would make maintaining flatness more difficult. */ 642 IRExpr* valE = (IRExpr*)val; 643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) 644 == st->Ist.WrTmp.data->Iex.Get.ty ); 645 if (typesOK && DEBUG_IROPT) { 646 vex_printf("rGET: "); ppIRExpr(get); 647 vex_printf(" -> "); ppIRExpr(valE); 648 vex_printf("\n"); 649 } 650 if (typesOK) 651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); 652 } else { 653 /* Not found, but at least we know that t and the Get(...) 654 are now associated. So add a binding to reflect that 655 fact. */ 656 addToHHW( env, (HWord)key, 657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) ); 658 } 659 } 660 661 /* Deal with Puts: invalidate any env entries overlapped by this 662 Put */ 663 if (st->tag == Ist_Put || st->tag == Ist_PutI) { 664 UInt k_lo, k_hi; 665 if (st->tag == Ist_Put) { 666 key = mk_key_GetPut( st->Ist.Put.offset, 667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 668 } else { 669 vassert(st->tag == Ist_PutI); 670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 671 } 672 673 k_lo = (key >> 16) & 0xFFFF; 674 k_hi = key & 0xFFFF; 675 invalidateOverlaps(env, k_lo, k_hi); 676 } 677 else 678 if (st->tag == Ist_Dirty) { 679 /* Deal with dirty helpers which write or modify guest state. 680 Invalidate the entire env. We could do a lot better 681 here. */ 682 IRDirty* d = st->Ist.Dirty.details; 683 Bool writes = False; 684 for (j = 0; j < d->nFxState; j++) { 685 if (d->fxState[j].fx == Ifx_Modify 686 || d->fxState[j].fx == Ifx_Write) 687 writes = True; 688 } 689 if (writes) { 690 /* dump the entire env (not clever, but correct ...) */ 691 for (j = 0; j < env->used; j++) 692 env->inuse[j] = False; 693 if (0) vex_printf("rGET: trash env due to dirty helper\n"); 694 } 695 } 696 697 /* add this one to the env, if appropriate */ 698 if (st->tag == Ist_Put) { 699 vassert(isIRAtom(st->Ist.Put.data)); 700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); 701 } 702 703 } /* for (i = 0; i < bb->stmts_used; i++) */ 704 705 } 706 707 708 /*---------------------------------------------------------------*/ 709 /*--- In-place removal of redundant PUTs ---*/ 710 /*---------------------------------------------------------------*/ 711 712 /* Find any Get uses in st and invalidate any partially or fully 713 overlapping ranges listed in env. Due to the flattening phase, the 714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ 715 716 static void handle_gets_Stmt ( 717 HashHW* env, 718 IRStmt* st, 719 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 720 VexRegisterUpdates pxControl 721 ) 722 { 723 Int j; 724 UInt key = 0; /* keep gcc -O happy */ 725 Bool isGet; 726 Bool memRW = False; 727 IRExpr* e; 728 729 switch (st->tag) { 730 731 /* This is the only interesting case. Deal with Gets in the RHS 732 expression. */ 733 case Ist_WrTmp: 734 e = st->Ist.WrTmp.data; 735 switch (e->tag) { 736 case Iex_Get: 737 isGet = True; 738 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty ); 739 break; 740 case Iex_GetI: 741 isGet = True; 742 key = mk_key_GetIPutI ( e->Iex.GetI.descr ); 743 break; 744 case Iex_Load: 745 isGet = False; 746 memRW = True; 747 break; 748 default: 749 isGet = False; 750 } 751 if (isGet) { 752 UInt k_lo, k_hi; 753 k_lo = (key >> 16) & 0xFFFF; 754 k_hi = key & 0xFFFF; 755 invalidateOverlaps(env, k_lo, k_hi); 756 } 757 break; 758 759 /* Be very conservative for dirty helper calls; dump the entire 760 environment. The helper might read guest state, in which 761 case it needs to be flushed first. Also, the helper might 762 access guest memory, in which case all parts of the guest 763 state requiring precise exceptions needs to be flushed. The 764 crude solution is just to flush everything; we could easily 765 enough do a lot better if needed. */ 766 /* Probably also overly-conservative, but also dump everything 767 if we hit a memory bus event (fence, lock, unlock). Ditto 768 AbiHints, CASs, LLs and SCs. */ 769 case Ist_AbiHint: 770 vassert(isIRAtom(st->Ist.AbiHint.base)); 771 vassert(isIRAtom(st->Ist.AbiHint.nia)); 772 /* fall through */ 773 case Ist_MBE: 774 case Ist_Dirty: 775 case Ist_CAS: 776 case Ist_LLSC: 777 for (j = 0; j < env->used; j++) 778 env->inuse[j] = False; 779 break; 780 781 /* all other cases are boring. */ 782 case Ist_Store: 783 vassert(isIRAtom(st->Ist.Store.addr)); 784 vassert(isIRAtom(st->Ist.Store.data)); 785 memRW = True; 786 break; 787 case Ist_StoreG: { 788 IRStoreG* sg = st->Ist.StoreG.details; 789 vassert(isIRAtom(sg->addr)); 790 vassert(isIRAtom(sg->data)); 791 vassert(isIRAtom(sg->guard)); 792 memRW = True; 793 break; 794 } 795 case Ist_LoadG: { 796 IRLoadG* lg = st->Ist.LoadG.details; 797 vassert(isIRAtom(lg->addr)); 798 vassert(isIRAtom(lg->alt)); 799 vassert(isIRAtom(lg->guard)); 800 memRW = True; 801 break; 802 } 803 case Ist_Exit: 804 vassert(isIRAtom(st->Ist.Exit.guard)); 805 break; 806 807 case Ist_Put: 808 vassert(isIRAtom(st->Ist.Put.data)); 809 break; 810 811 case Ist_PutI: 812 vassert(isIRAtom(st->Ist.PutI.details->ix)); 813 vassert(isIRAtom(st->Ist.PutI.details->data)); 814 break; 815 816 case Ist_NoOp: 817 case Ist_IMark: 818 break; 819 820 default: 821 vex_printf("\n"); 822 ppIRStmt(st); 823 vex_printf("\n"); 824 vpanic("handle_gets_Stmt"); 825 } 826 827 if (memRW) { 828 /* This statement accesses memory. So we might need to dump all parts 829 of the environment corresponding to guest state that may not 830 be reordered with respect to memory references. That means 831 at least the stack pointer. */ 832 switch (pxControl) { 833 case VexRegUpdAllregsAtMemAccess: 834 /* Precise exceptions required at mem access. 835 Flush all guest state. */ 836 for (j = 0; j < env->used; j++) 837 env->inuse[j] = False; 838 break; 839 case VexRegUpdSpAtMemAccess: 840 /* We need to dump the stack pointer 841 (needed for stack extension in m_signals.c). 842 preciseMemExnsFn will use vex_control.iropt_register_updates 843 to verify only the sp is to be checked. */ 844 /* fallthrough */ 845 case VexRegUpdUnwindregsAtMemAccess: 846 for (j = 0; j < env->used; j++) { 847 if (!env->inuse[j]) 848 continue; 849 /* Just flush the minimal amount required, as computed by 850 preciseMemExnsFn. */ 851 HWord k_lo = (env->key[j] >> 16) & 0xFFFF; 852 HWord k_hi = env->key[j] & 0xFFFF; 853 if (preciseMemExnsFn( k_lo, k_hi, pxControl )) 854 env->inuse[j] = False; 855 } 856 break; 857 case VexRegUpdAllregsAtEachInsn: 858 // VexRegUpdAllregsAtEachInsn cannot happen here. 859 // fall through 860 case VexRegUpd_INVALID: 861 default: 862 vassert(0); 863 } 864 } /* if (memRW) */ 865 866 } 867 868 869 /* Scan backwards, building up a set of (min offset, max 870 offset) pairs, indicating those parts of the guest state 871 for which the next event is a write. 872 873 On seeing a conditional exit, empty the set. 874 875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is 876 completely within the set, remove the Put. Otherwise, add 877 (minoff,maxoff) to the set. 878 879 On seeing 'Get (minoff,maxoff)', remove any part of the set 880 overlapping (minoff,maxoff). The same has to happen for any events 881 which implicitly read parts of the guest state: dirty helper calls 882 and loads/stores. 883 */ 884 885 static void redundant_put_removal_BB ( 886 IRSB* bb, 887 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 888 VexRegisterUpdates pxControl 889 ) 890 { 891 Int i, j; 892 Bool isPut; 893 IRStmt* st; 894 UInt key = 0; /* keep gcc -O happy */ 895 896 vassert(pxControl < VexRegUpdAllregsAtEachInsn); 897 898 HashHW* env = newHHW(); 899 900 /* Initialise the running env with the fact that the final exit 901 writes the IP (or, whatever it claims to write. We don't 902 care.) */ 903 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); 904 addToHHW(env, (HWord)key, 0); 905 906 /* And now scan backwards through the statements. */ 907 for (i = bb->stmts_used-1; i >= 0; i--) { 908 st = bb->stmts[i]; 909 910 if (st->tag == Ist_NoOp) 911 continue; 912 913 /* Deal with conditional exits. */ 914 if (st->tag == Ist_Exit) { 915 //Bool re_add; 916 /* Need to throw out from the env, any part of it which 917 doesn't overlap with the guest state written by this exit. 918 Since the exit only writes one section, it's simplest to 919 do this: (1) check whether env contains a write that 920 completely overlaps the write done by this exit; (2) empty 921 out env; and (3) if (1) was true, add the write done by 922 this exit. 923 924 To make (1) a bit simpler, merely search for a write that 925 exactly matches the one done by this exit. That's safe 926 because it will fail as often or more often than a full 927 overlap check, and failure to find an overlapping write in 928 env is the safe case (we just nuke env if that 929 happens). */ 930 //vassert(isIRAtom(st->Ist.Exit.guard)); 931 /* (1) */ 932 //key = mk_key_GetPut(st->Ist.Exit.offsIP, 933 // typeOfIRConst(st->Ist.Exit.dst)); 934 //re_add = lookupHHW(env, NULL, key); 935 /* (2) */ 936 for (j = 0; j < env->used; j++) 937 env->inuse[j] = False; 938 /* (3) */ 939 //if (0 && re_add) 940 // addToHHW(env, (HWord)key, 0); 941 continue; 942 } 943 944 /* Deal with Puts */ 945 switch (st->tag) { 946 case Ist_Put: 947 isPut = True; 948 key = mk_key_GetPut( st->Ist.Put.offset, 949 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 950 vassert(isIRAtom(st->Ist.Put.data)); 951 break; 952 case Ist_PutI: 953 isPut = True; 954 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 955 vassert(isIRAtom(st->Ist.PutI.details->ix)); 956 vassert(isIRAtom(st->Ist.PutI.details->data)); 957 break; 958 default: 959 isPut = False; 960 } 961 if (isPut && st->tag != Ist_PutI) { 962 /* See if any single entry in env overlaps this Put. This is 963 simplistic in that the transformation is valid if, say, two 964 or more entries in the env overlap this Put, but the use of 965 lookupHHW will only find a single entry which exactly 966 overlaps this Put. This is suboptimal but safe. */ 967 if (lookupHHW(env, NULL, (HWord)key)) { 968 /* This Put is redundant because a later one will overwrite 969 it. So NULL (nop) it out. */ 970 if (DEBUG_IROPT) { 971 vex_printf("rPUT: "); ppIRStmt(st); 972 vex_printf("\n"); 973 } 974 bb->stmts[i] = IRStmt_NoOp(); 975 } else { 976 /* We can't demonstrate that this Put is redundant, so add it 977 to the running collection. */ 978 addToHHW(env, (HWord)key, 0); 979 } 980 continue; 981 } 982 983 /* Deal with Gets. These remove bits of the environment since 984 appearance of a Get means that the next event for that slice 985 of the guest state is no longer a write, but a read. Also 986 deals with implicit reads of guest state needed to maintain 987 precise exceptions. */ 988 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl ); 989 } 990 } 991 992 993 /*---------------------------------------------------------------*/ 994 /*--- Constant propagation and folding ---*/ 995 /*---------------------------------------------------------------*/ 996 997 #if STATS_IROPT 998 /* How often sameIRExprs was invoked */ 999 static UInt invocation_count; 1000 /* How often sameIRExprs recursed through IRTemp assignments */ 1001 static UInt recursion_count; 1002 /* How often sameIRExprs found identical IRExprs */ 1003 static UInt success_count; 1004 /* How often recursing through assignments to IRTemps helped 1005 establishing equality. */ 1006 static UInt recursion_success_count; 1007 /* Whether or not recursing through an IRTemp assignment helped 1008 establishing IRExpr equality for a given sameIRExprs invocation. */ 1009 static Bool recursion_helped; 1010 /* Whether or not a given sameIRExprs invocation recursed through an 1011 IRTemp assignment */ 1012 static Bool recursed; 1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */ 1014 static UInt max_nodes_visited; 1015 #endif /* STATS_IROPT */ 1016 1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */ 1018 static UInt num_nodes_visited; 1019 1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs. 1021 This is to guard against performance degradation by visiting large 1022 trees without success. */ 1023 #define NODE_LIMIT 30 1024 1025 1026 /* The env in this section is a map from IRTemp to IRExpr*, 1027 that is, an array indexed by IRTemp. */ 1028 1029 /* Do both expressions compute the same value? The answer is generally 1030 conservative, i.e. it will report that the expressions do not compute 1031 the same value when in fact they do. The reason is that we do not 1032 keep track of changes in the guest state and memory. Thusly, two 1033 Get's, GetI's or Load's, even when accessing the same location, will be 1034 assumed to compute different values. After all the accesses may happen 1035 at different times and the guest state / memory can have changed in 1036 the meantime. 1037 1038 XXX IMPORTANT XXX the two expressions must have the same IR type. 1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */ 1040 1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable 1042 wrapper that deals with the common tags-don't-match case, and a 1043 slower out of line general case. Saves a few insns. */ 1044 1045 __attribute__((noinline)) 1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); 1047 1048 inline 1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1050 { 1051 if (e1->tag != e2->tag) return False; 1052 return sameIRExprs_aux2(env, e1, e2); 1053 } 1054 1055 __attribute__((noinline)) 1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1057 { 1058 if (num_nodes_visited++ > NODE_LIMIT) return False; 1059 1060 switch (e1->tag) { 1061 case Iex_RdTmp: 1062 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; 1063 1064 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { 1065 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp], 1066 env[e2->Iex.RdTmp.tmp]); 1067 #if STATS_IROPT 1068 recursed = True; 1069 if (same) recursion_helped = True; 1070 #endif 1071 return same; 1072 } 1073 return False; 1074 1075 case Iex_Get: 1076 case Iex_GetI: 1077 case Iex_Load: 1078 /* Guest state / memory could have changed in the meantime. */ 1079 return False; 1080 1081 case Iex_Binop: 1082 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op 1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg1, 1084 e2->Iex.Binop.arg1 ) 1085 && sameIRExprs_aux( env, e1->Iex.Binop.arg2, 1086 e2->Iex.Binop.arg2 )); 1087 1088 case Iex_Unop: 1089 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op 1090 && sameIRExprs_aux( env, e1->Iex.Unop.arg, 1091 e2->Iex.Unop.arg )); 1092 1093 case Iex_Const: { 1094 IRConst *c1 = e1->Iex.Const.con; 1095 IRConst *c2 = e2->Iex.Const.con; 1096 vassert(c1->tag == c2->tag); 1097 switch (c1->tag) { 1098 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 ); 1099 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 ); 1100 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 ); 1101 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 ); 1102 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 ); 1103 default: break; 1104 } 1105 return False; 1106 } 1107 1108 case Iex_Triop: { 1109 IRTriop *tri1 = e1->Iex.Triop.details; 1110 IRTriop *tri2 = e2->Iex.Triop.details; 1111 return toBool( tri1->op == tri2->op 1112 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 ) 1113 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 ) 1114 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 )); 1115 } 1116 1117 case Iex_ITE: 1118 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond, 1119 e2->Iex.ITE.cond ) 1120 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue, 1121 e2->Iex.ITE.iftrue ) 1122 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse, 1123 e2->Iex.ITE.iffalse )); 1124 1125 default: 1126 /* Not very likely to be "same". */ 1127 break; 1128 } 1129 1130 return False; 1131 } 1132 1133 inline 1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1135 { 1136 Bool same; 1137 1138 num_nodes_visited = 0; 1139 same = sameIRExprs_aux(env, e1, e2); 1140 1141 #if STATS_IROPT 1142 ++invocation_count; 1143 if (recursed) ++recursion_count; 1144 success_count += same; 1145 if (same && recursion_helped) 1146 ++recursion_success_count; 1147 if (num_nodes_visited > max_nodes_visited) 1148 max_nodes_visited = num_nodes_visited; 1149 recursed = False; /* reset */ 1150 recursion_helped = False; /* reset */ 1151 #endif /* STATS_IROPT */ 1152 1153 return same; 1154 } 1155 1156 1157 /* Debugging-only hack (not used in production runs): make a guess 1158 whether sameIRExprs might assert due to the two args being of 1159 different types. If in doubt return False. Is only used when 1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0. 1161 Bad because it duplicates functionality from typeOfIRExpr. See 1162 comment on the single use point below for rationale. */ 1163 static 1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) 1165 { 1166 if (e1->tag != e2->tag) return False; 1167 switch (e1->tag) { 1168 case Iex_Const: { 1169 /* The only interesting case */ 1170 IRConst *c1 = e1->Iex.Const.con; 1171 IRConst *c2 = e2->Iex.Const.con; 1172 return c1->tag != c2->tag; 1173 } 1174 default: 1175 break; 1176 } 1177 return False; 1178 } 1179 1180 1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ 1182 static Bool isZeroU32 ( IRExpr* e ) 1183 { 1184 return toBool( e->tag == Iex_Const 1185 && e->Iex.Const.con->tag == Ico_U32 1186 && e->Iex.Const.con->Ico.U32 == 0); 1187 } 1188 1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? 1190 Currently unused; commented out to avoid compiler warning */ 1191 #if 0 1192 static Bool isZeroU64 ( IRExpr* e ) 1193 { 1194 return toBool( e->tag == Iex_Const 1195 && e->Iex.Const.con->tag == Ico_U64 1196 && e->Iex.Const.con->Ico.U64 == 0); 1197 } 1198 #endif 1199 1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */ 1201 static Bool isZeroV128 ( IRExpr* e ) 1202 { 1203 return toBool( e->tag == Iex_Const 1204 && e->Iex.Const.con->tag == Ico_V128 1205 && e->Iex.Const.con->Ico.V128 == 0x0000); 1206 } 1207 1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */ 1209 static Bool isZeroV256 ( IRExpr* e ) 1210 { 1211 return toBool( e->tag == Iex_Const 1212 && e->Iex.Const.con->tag == Ico_V256 1213 && e->Iex.Const.con->Ico.V256 == 0x00000000); 1214 } 1215 1216 /* Is this an integer constant with value 0 ? */ 1217 static Bool isZeroU ( IRExpr* e ) 1218 { 1219 if (e->tag != Iex_Const) return False; 1220 switch (e->Iex.Const.con->tag) { 1221 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0); 1222 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0); 1223 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0); 1224 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0); 1225 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0); 1226 default: vpanic("isZeroU"); 1227 } 1228 } 1229 1230 /* Is this an integer constant with value 1---1b ? */ 1231 static Bool isOnesU ( IRExpr* e ) 1232 { 1233 if (e->tag != Iex_Const) return False; 1234 switch (e->Iex.Const.con->tag) { 1235 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF); 1236 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF); 1237 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 1238 == 0xFFFFFFFF); 1239 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 1240 == 0xFFFFFFFFFFFFFFFFULL); 1241 default: ppIRExpr(e); vpanic("isOnesU"); 1242 } 1243 } 1244 1245 static Bool notBool ( Bool b ) 1246 { 1247 if (b == True) return False; 1248 if (b == False) return True; 1249 vpanic("notBool"); 1250 } 1251 1252 /* Make a zero which has the same type as the result of the given 1253 primop. */ 1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op ) 1255 { 1256 switch (op) { 1257 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0))); 1258 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0)); 1259 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); 1260 case Iop_Sub32: 1261 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); 1262 case Iop_And64: 1263 case Iop_Sub64: 1264 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); 1265 case Iop_XorV128: 1266 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0)); 1267 case Iop_XorV256: 1268 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0)); 1269 default: vpanic("mkZeroOfPrimopResultType: bad primop"); 1270 } 1271 } 1272 1273 /* Make a value containing all 1-bits, which has the same type as the 1274 result of the given primop. */ 1275 static IRExpr* mkOnesOfPrimopResultType ( IROp op ) 1276 { 1277 switch (op) { 1278 case Iop_CmpEQ32: 1279 case Iop_CmpEQ64: 1280 return IRExpr_Const(IRConst_U1(toBool(1))); 1281 case Iop_Or8: 1282 return IRExpr_Const(IRConst_U8(0xFF)); 1283 case Iop_Or16: 1284 return IRExpr_Const(IRConst_U16(0xFFFF)); 1285 case Iop_Or32: 1286 return IRExpr_Const(IRConst_U32(0xFFFFFFFF)); 1287 case Iop_CmpEQ8x8: 1288 case Iop_Or64: 1289 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL)); 1290 case Iop_CmpEQ8x16: 1291 case Iop_CmpEQ16x8: 1292 case Iop_CmpEQ32x4: 1293 return IRExpr_Const(IRConst_V128(0xFFFF)); 1294 default: 1295 ppIROp(op); 1296 vpanic("mkOnesOfPrimopResultType: bad primop"); 1297 } 1298 } 1299 1300 /* Helpers for folding Clz32/64. */ 1301 static UInt fold_Clz64 ( ULong value ) 1302 { 1303 UInt i; 1304 vassert(value != 0ULL); /* no defined semantics for arg==0 */ 1305 for (i = 0; i < 64; ++i) { 1306 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i; 1307 } 1308 vassert(0); 1309 /*NOTREACHED*/ 1310 return 0; 1311 } 1312 1313 static UInt fold_Clz32 ( UInt value ) 1314 { 1315 UInt i; 1316 vassert(value != 0); /* no defined semantics for arg==0 */ 1317 for (i = 0; i < 32; ++i) { 1318 if (0 != (value & (((UInt)1) << (31 - i)))) return i; 1319 } 1320 vassert(0); 1321 /*NOTREACHED*/ 1322 return 0; 1323 } 1324 1325 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to 1326 the corresponding real constant. */ 1327 //XXX re-check this before use 1328 //static ULong de_summarise_V64 ( UChar v64 ) 1329 //{ 1330 // ULong r = 0; 1331 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL; 1332 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL; 1333 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL; 1334 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL; 1335 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL; 1336 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL; 1337 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL; 1338 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL; 1339 // return r; 1340 //} 1341 1342 /* Helper for arbitrary expression pattern matching in flat IR. If 1343 'e' is a reference to a tmp, look it up in env -- repeatedly, if 1344 necessary -- until it resolves to a non-tmp. Note that this can 1345 return NULL if it can't resolve 'e' to a new expression, which will 1346 be the case if 'e' is instead defined by an IRStmt (IRDirty or 1347 LLSC). */ 1348 static IRExpr* chase ( IRExpr** env, IRExpr* e ) 1349 { 1350 /* Why is this loop guaranteed to terminate? Because all tmps must 1351 have definitions before use, hence a tmp cannot be bound 1352 (directly or indirectly) to itself. */ 1353 while (e->tag == Iex_RdTmp) { 1354 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); } 1355 e = env[(Int)e->Iex.RdTmp.tmp]; 1356 if (e == NULL) break; 1357 } 1358 return e; 1359 } 1360 1361 /* Similar to |chase|, but follows at most one level of tmp reference. */ 1362 static IRExpr* chase1 ( IRExpr** env, IRExpr* e ) 1363 { 1364 if (e == NULL || e->tag != Iex_RdTmp) 1365 return e; 1366 else 1367 return env[(Int)e->Iex.RdTmp.tmp]; 1368 } 1369 1370 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) 1371 { 1372 Int shift; 1373 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ 1374 1375 switch (e->tag) { 1376 case Iex_Unop: 1377 /* UNARY ops */ 1378 if (e->Iex.Unop.arg->tag == Iex_Const) { 1379 switch (e->Iex.Unop.op) { 1380 case Iop_1Uto8: 1381 e2 = IRExpr_Const(IRConst_U8(toUChar( 1382 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1383 ? 1 : 0))); 1384 break; 1385 case Iop_1Uto32: 1386 e2 = IRExpr_Const(IRConst_U32( 1387 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1388 ? 1 : 0)); 1389 break; 1390 case Iop_1Uto64: 1391 e2 = IRExpr_Const(IRConst_U64( 1392 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1393 ? 1 : 0)); 1394 break; 1395 1396 case Iop_1Sto8: 1397 e2 = IRExpr_Const(IRConst_U8(toUChar( 1398 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1399 ? 0xFF : 0))); 1400 break; 1401 case Iop_1Sto16: 1402 e2 = IRExpr_Const(IRConst_U16(toUShort( 1403 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1404 ? 0xFFFF : 0))); 1405 break; 1406 case Iop_1Sto32: 1407 e2 = IRExpr_Const(IRConst_U32( 1408 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1409 ? 0xFFFFFFFF : 0)); 1410 break; 1411 case Iop_1Sto64: 1412 e2 = IRExpr_Const(IRConst_U64( 1413 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1414 ? 0xFFFFFFFFFFFFFFFFULL : 0)); 1415 break; 1416 1417 case Iop_8Sto32: { 1418 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1419 u32 <<= 24; 1420 u32 = (Int)u32 >> 24; /* signed shift */ 1421 e2 = IRExpr_Const(IRConst_U32(u32)); 1422 break; 1423 } 1424 case Iop_16Sto32: { 1425 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1426 u32 <<= 16; 1427 u32 = (Int)u32 >> 16; /* signed shift */ 1428 e2 = IRExpr_Const(IRConst_U32(u32)); 1429 break; 1430 } 1431 case Iop_8Uto64: 1432 e2 = IRExpr_Const(IRConst_U64( 1433 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1434 break; 1435 case Iop_16Uto64: 1436 e2 = IRExpr_Const(IRConst_U64( 1437 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1438 break; 1439 case Iop_8Uto32: 1440 e2 = IRExpr_Const(IRConst_U32( 1441 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1442 break; 1443 case Iop_8Sto16: { 1444 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1445 u16 <<= 8; 1446 u16 = (Short)u16 >> 8; /* signed shift */ 1447 e2 = IRExpr_Const(IRConst_U16(u16)); 1448 break; 1449 } 1450 case Iop_8Uto16: 1451 e2 = IRExpr_Const(IRConst_U16( 1452 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1453 break; 1454 case Iop_16Uto32: 1455 e2 = IRExpr_Const(IRConst_U32( 1456 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1457 break; 1458 case Iop_32to16: 1459 e2 = IRExpr_Const(IRConst_U16(toUShort( 1460 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1461 break; 1462 case Iop_32to8: 1463 e2 = IRExpr_Const(IRConst_U8(toUChar( 1464 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1465 break; 1466 case Iop_32to1: 1467 e2 = IRExpr_Const(IRConst_U1(toBool( 1468 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1469 ))); 1470 break; 1471 case Iop_64to1: 1472 e2 = IRExpr_Const(IRConst_U1(toBool( 1473 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64) 1474 ))); 1475 break; 1476 1477 case Iop_NotV128: 1478 e2 = IRExpr_Const(IRConst_V128( 1479 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128))); 1480 break; 1481 case Iop_Not64: 1482 e2 = IRExpr_Const(IRConst_U64( 1483 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); 1484 break; 1485 case Iop_Not32: 1486 e2 = IRExpr_Const(IRConst_U32( 1487 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1488 break; 1489 case Iop_Not16: 1490 e2 = IRExpr_Const(IRConst_U16(toUShort( 1491 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16)))); 1492 break; 1493 case Iop_Not8: 1494 e2 = IRExpr_Const(IRConst_U8(toUChar( 1495 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); 1496 break; 1497 1498 case Iop_Not1: 1499 e2 = IRExpr_Const(IRConst_U1( 1500 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); 1501 break; 1502 1503 case Iop_64to8: { 1504 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1505 w64 &= 0xFFULL; 1506 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 )); 1507 break; 1508 } 1509 case Iop_64to16: { 1510 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1511 w64 &= 0xFFFFULL; 1512 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 )); 1513 break; 1514 } 1515 case Iop_64to32: { 1516 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1517 w64 &= 0x00000000FFFFFFFFULL; 1518 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1519 break; 1520 } 1521 case Iop_64HIto32: { 1522 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1523 w64 >>= 32; 1524 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1525 break; 1526 } 1527 case Iop_32Uto64: 1528 e2 = IRExpr_Const(IRConst_U64( 1529 0xFFFFFFFFULL 1530 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)); 1531 break; 1532 case Iop_16Sto64: { 1533 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1534 u64 <<= 48; 1535 u64 = (Long)u64 >> 48; /* signed shift */ 1536 e2 = IRExpr_Const(IRConst_U64(u64)); 1537 break; 1538 } 1539 case Iop_32Sto64: { 1540 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1541 u64 <<= 32; 1542 u64 = (Long)u64 >> 32; /* signed shift */ 1543 e2 = IRExpr_Const(IRConst_U64(u64)); 1544 break; 1545 } 1546 1547 case Iop_16to8: { 1548 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1549 w16 &= 0xFF; 1550 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1551 break; 1552 } 1553 case Iop_16HIto8: { 1554 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1555 w16 >>= 8; 1556 w16 &= 0xFF; 1557 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1558 break; 1559 } 1560 1561 case Iop_CmpNEZ8: 1562 e2 = IRExpr_Const(IRConst_U1(toBool( 1563 0 != 1564 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) 1565 ))); 1566 break; 1567 case Iop_CmpNEZ32: 1568 e2 = IRExpr_Const(IRConst_U1(toBool( 1569 0 != 1570 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1571 ))); 1572 break; 1573 case Iop_CmpNEZ64: 1574 e2 = IRExpr_Const(IRConst_U1(toBool( 1575 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64 1576 ))); 1577 break; 1578 1579 case Iop_CmpwNEZ32: { 1580 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1581 if (w32 == 0) 1582 e2 = IRExpr_Const(IRConst_U32( 0 )); 1583 else 1584 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); 1585 break; 1586 } 1587 case Iop_CmpwNEZ64: { 1588 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1589 if (w64 == 0) 1590 e2 = IRExpr_Const(IRConst_U64( 0 )); 1591 else 1592 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); 1593 break; 1594 } 1595 1596 case Iop_Left32: { 1597 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1598 Int s32 = (Int)(u32 & 0xFFFFFFFF); 1599 s32 = (s32 | (-s32)); 1600 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); 1601 break; 1602 } 1603 1604 case Iop_Left64: { 1605 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1606 Long s64 = (Long)u64; 1607 s64 = (s64 | (-s64)); 1608 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); 1609 break; 1610 } 1611 1612 case Iop_Clz32: { 1613 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1614 if (u32 != 0) 1615 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32))); 1616 break; 1617 } 1618 case Iop_Clz64: { 1619 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1620 if (u64 != 0ULL) 1621 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64))); 1622 break; 1623 } 1624 1625 /* For these vector ones, can't fold all cases, but at least 1626 do the most obvious one. Could do better here using 1627 summarise/desummarise of vector constants, but too 1628 difficult to verify; hence just handle the zero cases. */ 1629 case Iop_32UtoV128: { 1630 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1631 if (0 == u32) { 1632 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1633 } else { 1634 goto unhandled; 1635 } 1636 break; 1637 } 1638 case Iop_V128to64: { 1639 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1640 if (0 == ((v128 >> 0) & 0xFF)) { 1641 e2 = IRExpr_Const(IRConst_U64(0)); 1642 } else { 1643 goto unhandled; 1644 } 1645 break; 1646 } 1647 case Iop_V128HIto64: { 1648 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1649 if (0 == ((v128 >> 8) & 0xFF)) { 1650 e2 = IRExpr_Const(IRConst_U64(0)); 1651 } else { 1652 goto unhandled; 1653 } 1654 break; 1655 } 1656 case Iop_64UtoV128: { 1657 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1658 if (0 == u64) { 1659 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1660 } else { 1661 goto unhandled; 1662 } 1663 break; 1664 } 1665 1666 /* Even stupider (although still correct ..) */ 1667 case Iop_V256to64_0: case Iop_V256to64_1: 1668 case Iop_V256to64_2: case Iop_V256to64_3: { 1669 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256; 1670 if (v256 == 0x00000000) { 1671 e2 = IRExpr_Const(IRConst_U64(0)); 1672 } else { 1673 goto unhandled; 1674 } 1675 break; 1676 } 1677 1678 case Iop_ZeroHI64ofV128: { 1679 /* Could do better here -- only need to look at the bottom 64 bits 1680 of the argument, really. */ 1681 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1682 if (v128 == 0x0000) { 1683 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1684 } else { 1685 goto unhandled; 1686 } 1687 break; 1688 } 1689 1690 default: 1691 goto unhandled; 1692 } 1693 } 1694 break; 1695 1696 case Iex_Binop: 1697 /* BINARY ops */ 1698 if (e->Iex.Binop.arg1->tag == Iex_Const 1699 && e->Iex.Binop.arg2->tag == Iex_Const) { 1700 /* cases where both args are consts */ 1701 switch (e->Iex.Binop.op) { 1702 1703 /* -- Or -- */ 1704 case Iop_Or8: 1705 e2 = IRExpr_Const(IRConst_U8(toUChar( 1706 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1707 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1708 break; 1709 case Iop_Or16: 1710 e2 = IRExpr_Const(IRConst_U16(toUShort( 1711 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1712 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1713 break; 1714 case Iop_Or32: 1715 e2 = IRExpr_Const(IRConst_U32( 1716 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1717 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1718 break; 1719 case Iop_Or64: 1720 e2 = IRExpr_Const(IRConst_U64( 1721 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1722 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1723 break; 1724 case Iop_OrV128: 1725 e2 = IRExpr_Const(IRConst_V128( 1726 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1727 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1728 break; 1729 1730 /* -- Xor -- */ 1731 case Iop_Xor8: 1732 e2 = IRExpr_Const(IRConst_U8(toUChar( 1733 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1734 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1735 break; 1736 case Iop_Xor16: 1737 e2 = IRExpr_Const(IRConst_U16(toUShort( 1738 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1739 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1740 break; 1741 case Iop_Xor32: 1742 e2 = IRExpr_Const(IRConst_U32( 1743 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1744 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1745 break; 1746 case Iop_Xor64: 1747 e2 = IRExpr_Const(IRConst_U64( 1748 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1749 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1750 break; 1751 case Iop_XorV128: 1752 e2 = IRExpr_Const(IRConst_V128( 1753 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1754 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1755 break; 1756 1757 /* -- And -- */ 1758 case Iop_And8: 1759 e2 = IRExpr_Const(IRConst_U8(toUChar( 1760 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1761 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1762 break; 1763 case Iop_And16: 1764 e2 = IRExpr_Const(IRConst_U16(toUShort( 1765 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1766 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1767 break; 1768 case Iop_And32: 1769 e2 = IRExpr_Const(IRConst_U32( 1770 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1771 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1772 break; 1773 case Iop_And64: 1774 e2 = IRExpr_Const(IRConst_U64( 1775 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1776 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1777 break; 1778 case Iop_AndV128: 1779 e2 = IRExpr_Const(IRConst_V128( 1780 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1781 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1782 break; 1783 1784 /* -- Add -- */ 1785 case Iop_Add8: 1786 e2 = IRExpr_Const(IRConst_U8(toUChar( 1787 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1788 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1789 break; 1790 case Iop_Add32: 1791 e2 = IRExpr_Const(IRConst_U32( 1792 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1793 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1794 break; 1795 case Iop_Add64: 1796 e2 = IRExpr_Const(IRConst_U64( 1797 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1798 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1799 break; 1800 1801 /* -- Sub -- */ 1802 case Iop_Sub8: 1803 e2 = IRExpr_Const(IRConst_U8(toUChar( 1804 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1805 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1806 break; 1807 case Iop_Sub32: 1808 e2 = IRExpr_Const(IRConst_U32( 1809 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1810 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1811 break; 1812 case Iop_Sub64: 1813 e2 = IRExpr_Const(IRConst_U64( 1814 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1815 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1816 break; 1817 1818 /* -- Max32U -- */ 1819 case Iop_Max32U: { 1820 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1821 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1822 UInt res = u32a > u32b ? u32a : u32b; 1823 e2 = IRExpr_Const(IRConst_U32(res)); 1824 break; 1825 } 1826 1827 /* -- Mul -- */ 1828 case Iop_Mul32: 1829 e2 = IRExpr_Const(IRConst_U32( 1830 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1831 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1832 break; 1833 case Iop_Mul64: 1834 e2 = IRExpr_Const(IRConst_U64( 1835 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1836 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1837 break; 1838 1839 case Iop_MullS32: { 1840 /* very paranoid */ 1841 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1842 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1843 Int s32a = (Int)u32a; 1844 Int s32b = (Int)u32b; 1845 Long s64a = (Long)s32a; 1846 Long s64b = (Long)s32b; 1847 Long sres = s64a * s64b; 1848 ULong ures = (ULong)sres; 1849 e2 = IRExpr_Const(IRConst_U64(ures)); 1850 break; 1851 } 1852 1853 /* -- Shl -- */ 1854 case Iop_Shl32: 1855 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1856 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1857 if (shift >= 0 && shift <= 31) 1858 e2 = IRExpr_Const(IRConst_U32( 1859 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1860 << shift))); 1861 break; 1862 case Iop_Shl64: 1863 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1864 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1865 if (shift >= 0 && shift <= 63) 1866 e2 = IRExpr_Const(IRConst_U64( 1867 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1868 << shift))); 1869 break; 1870 1871 /* -- Sar -- */ 1872 case Iop_Sar32: { 1873 /* paranoid ... */ 1874 /*signed*/ Int s32; 1875 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1876 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1877 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1878 if (shift >= 0 && shift <= 31) { 1879 s32 >>=/*signed*/ shift; 1880 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1881 } 1882 break; 1883 } 1884 case Iop_Sar64: { 1885 /* paranoid ... */ 1886 /*signed*/ Long s64; 1887 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1888 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1889 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1890 if (shift >= 0 && shift <= 63) { 1891 s64 >>=/*signed*/ shift; 1892 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1893 } 1894 break; 1895 } 1896 1897 /* -- Shr -- */ 1898 case Iop_Shr32: { 1899 /* paranoid ... */ 1900 /*unsigned*/ UInt u32; 1901 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1902 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1903 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1904 if (shift >= 0 && shift <= 31) { 1905 u32 >>=/*unsigned*/ shift; 1906 e2 = IRExpr_Const(IRConst_U32(u32)); 1907 } 1908 break; 1909 } 1910 case Iop_Shr64: { 1911 /* paranoid ... */ 1912 /*unsigned*/ ULong u64; 1913 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1914 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1915 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1916 if (shift >= 0 && shift <= 63) { 1917 u64 >>=/*unsigned*/ shift; 1918 e2 = IRExpr_Const(IRConst_U64(u64)); 1919 } 1920 break; 1921 } 1922 1923 /* -- CmpEQ -- */ 1924 case Iop_CmpEQ32: 1925 e2 = IRExpr_Const(IRConst_U1(toBool( 1926 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1927 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1928 break; 1929 case Iop_CmpEQ64: 1930 e2 = IRExpr_Const(IRConst_U1(toBool( 1931 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1932 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1933 break; 1934 1935 /* -- CmpNE -- */ 1936 case Iop_CmpNE8: 1937 case Iop_CasCmpNE8: 1938 case Iop_ExpCmpNE8: 1939 e2 = IRExpr_Const(IRConst_U1(toBool( 1940 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) 1941 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); 1942 break; 1943 case Iop_CmpNE32: 1944 case Iop_CasCmpNE32: 1945 case Iop_ExpCmpNE32: 1946 e2 = IRExpr_Const(IRConst_U1(toBool( 1947 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1948 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1949 break; 1950 case Iop_CmpNE64: 1951 case Iop_CasCmpNE64: 1952 case Iop_ExpCmpNE64: 1953 e2 = IRExpr_Const(IRConst_U1(toBool( 1954 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1955 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1956 break; 1957 1958 /* -- CmpLEU -- */ 1959 case Iop_CmpLE32U: 1960 e2 = IRExpr_Const(IRConst_U1(toBool( 1961 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1962 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1963 break; 1964 case Iop_CmpLE64U: 1965 e2 = IRExpr_Const(IRConst_U1(toBool( 1966 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1967 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1968 break; 1969 1970 /* -- CmpLES -- */ 1971 case Iop_CmpLE32S: 1972 e2 = IRExpr_Const(IRConst_U1(toBool( 1973 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1974 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1975 break; 1976 case Iop_CmpLE64S: 1977 e2 = IRExpr_Const(IRConst_U1(toBool( 1978 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1979 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1980 break; 1981 1982 /* -- CmpLTS -- */ 1983 case Iop_CmpLT32S: 1984 e2 = IRExpr_Const(IRConst_U1(toBool( 1985 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1986 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1987 break; 1988 case Iop_CmpLT64S: 1989 e2 = IRExpr_Const(IRConst_U1(toBool( 1990 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1991 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1992 break; 1993 1994 /* -- CmpLTU -- */ 1995 case Iop_CmpLT32U: 1996 e2 = IRExpr_Const(IRConst_U1(toBool( 1997 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1998 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1999 break; 2000 case Iop_CmpLT64U: 2001 e2 = IRExpr_Const(IRConst_U1(toBool( 2002 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 2003 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 2004 break; 2005 2006 /* -- CmpORD -- */ 2007 case Iop_CmpORD32S: { 2008 /* very paranoid */ 2009 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 2010 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 2011 Int s32a = (Int)u32a; 2012 Int s32b = (Int)u32b; 2013 Int r = 0x2; /* EQ */ 2014 if (s32a < s32b) { 2015 r = 0x8; /* LT */ 2016 } 2017 else if (s32a > s32b) { 2018 r = 0x4; /* GT */ 2019 } 2020 e2 = IRExpr_Const(IRConst_U32(r)); 2021 break; 2022 } 2023 2024 /* -- nHLto2n -- */ 2025 case Iop_32HLto64: 2026 e2 = IRExpr_Const(IRConst_U64( 2027 (((ULong)(e->Iex.Binop.arg1 2028 ->Iex.Const.con->Ico.U32)) << 32) 2029 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)) 2030 )); 2031 break; 2032 case Iop_64HLto128: 2033 /* We can't fold this, because there is no way to 2034 express he result in IR, but at least pretend to 2035 handle it, so as to stop getting blasted with 2036 no-rule-for-this-primop messages. */ 2037 break; 2038 /* For this vector one, can't fold all cases, but at 2039 least do the most obvious one. Could do better here 2040 using summarise/desummarise of vector constants, but 2041 too difficult to verify; hence just handle the zero 2042 cases. */ 2043 case Iop_64HLtoV128: { 2044 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64; 2045 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2046 if (0 == argHi && 0 == argLo) { 2047 e2 = IRExpr_Const(IRConst_V128(0)); 2048 } else { 2049 goto unhandled; 2050 } 2051 break; 2052 } 2053 /* Same reasoning for the 256-bit version. */ 2054 case Iop_V128HLtoV256: { 2055 IRExpr* argHi = e->Iex.Binop.arg1; 2056 IRExpr* argLo = e->Iex.Binop.arg2; 2057 if (isZeroV128(argHi) && isZeroV128(argLo)) { 2058 e2 = IRExpr_Const(IRConst_V256(0)); 2059 } else { 2060 goto unhandled; 2061 } 2062 break; 2063 } 2064 2065 /* -- V128 stuff -- */ 2066 case Iop_InterleaveLO8x16: { 2067 /* This turns up a lot in Memcheck instrumentation of 2068 Icc generated code. I don't know why. */ 2069 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128; 2070 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128; 2071 if (0 == arg1 && 0 == arg2) { 2072 e2 = IRExpr_Const(IRConst_V128(0)); 2073 } else { 2074 goto unhandled; 2075 } 2076 break; 2077 } 2078 2079 default: 2080 goto unhandled; 2081 } 2082 2083 } else { 2084 2085 /* other cases (identities, etc) */ 2086 switch (e->Iex.Binop.op) { 2087 2088 case Iop_Shl32: 2089 case Iop_Shl64: 2090 case Iop_Shr64: 2091 case Iop_Sar64: 2092 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */ 2093 if (isZeroU(e->Iex.Binop.arg2)) { 2094 e2 = e->Iex.Binop.arg1; 2095 break; 2096 } 2097 /* Shl32/Shl64/Shr64(0,x) ==> 0 */ 2098 if (isZeroU(e->Iex.Binop.arg1)) { 2099 e2 = e->Iex.Binop.arg1; 2100 break; 2101 } 2102 break; 2103 2104 case Iop_Sar32: 2105 case Iop_Shr32: 2106 /* Shr32/Sar32(x,0) ==> x */ 2107 if (isZeroU(e->Iex.Binop.arg2)) { 2108 e2 = e->Iex.Binop.arg1; 2109 break; 2110 } 2111 break; 2112 2113 case Iop_Or8: 2114 case Iop_Or16: 2115 case Iop_Or32: 2116 case Iop_Or64: 2117 case Iop_Max32U: 2118 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */ 2119 if (isZeroU(e->Iex.Binop.arg2)) { 2120 e2 = e->Iex.Binop.arg1; 2121 break; 2122 } 2123 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */ 2124 if (isZeroU(e->Iex.Binop.arg1)) { 2125 e2 = e->Iex.Binop.arg2; 2126 break; 2127 } 2128 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */ 2129 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */ 2130 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) { 2131 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2132 break; 2133 } 2134 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */ 2135 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2136 e2 = e->Iex.Binop.arg1; 2137 break; 2138 } 2139 break; 2140 2141 case Iop_Add8: 2142 /* Add8(t,t) ==> t << 1. 2143 Memcheck doesn't understand that 2144 x+x produces a defined least significant bit, and it seems 2145 simplest just to get rid of the problem by rewriting it 2146 out, since the opportunity to do so exists. */ 2147 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2148 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1, 2149 IRExpr_Const(IRConst_U8(1))); 2150 break; 2151 } 2152 break; 2153 2154 /* NB no Add16(t,t) case yet as no known test case exists */ 2155 2156 case Iop_Add32: 2157 case Iop_Add64: 2158 /* Add32/Add64(x,0) ==> x */ 2159 if (isZeroU(e->Iex.Binop.arg2)) { 2160 e2 = e->Iex.Binop.arg1; 2161 break; 2162 } 2163 /* Add32/Add64(0,x) ==> x */ 2164 if (isZeroU(e->Iex.Binop.arg1)) { 2165 e2 = e->Iex.Binop.arg2; 2166 break; 2167 } 2168 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */ 2169 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2170 e2 = IRExpr_Binop( 2171 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64, 2172 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); 2173 break; 2174 } 2175 break; 2176 2177 case Iop_Sub32: 2178 case Iop_Sub64: 2179 /* Sub32/Sub64(x,0) ==> x */ 2180 if (isZeroU(e->Iex.Binop.arg2)) { 2181 e2 = e->Iex.Binop.arg1; 2182 break; 2183 } 2184 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */ 2185 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2186 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2187 break; 2188 } 2189 break; 2190 case Iop_Sub8x16: 2191 /* Sub8x16(x,0) ==> x */ 2192 if (isZeroV128(e->Iex.Binop.arg2)) { 2193 e2 = e->Iex.Binop.arg1; 2194 break; 2195 } 2196 break; 2197 2198 case Iop_And8: 2199 case Iop_And16: 2200 case Iop_And32: 2201 case Iop_And64: 2202 /* And8/And16/And32/And64(x,1---1b) ==> x */ 2203 if (isOnesU(e->Iex.Binop.arg2)) { 2204 e2 = e->Iex.Binop.arg1; 2205 break; 2206 } 2207 /* And8/And16/And32/And64(1---1b,x) ==> x */ 2208 if (isOnesU(e->Iex.Binop.arg1)) { 2209 e2 = e->Iex.Binop.arg2; 2210 break; 2211 } 2212 /* And8/And16/And32/And64(x,0) ==> 0 */ 2213 if (isZeroU(e->Iex.Binop.arg2)) { 2214 e2 = e->Iex.Binop.arg2; 2215 break; 2216 } 2217 /* And8/And16/And32/And64(0,x) ==> 0 */ 2218 if (isZeroU(e->Iex.Binop.arg1)) { 2219 e2 = e->Iex.Binop.arg1; 2220 break; 2221 } 2222 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */ 2223 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2224 e2 = e->Iex.Binop.arg1; 2225 break; 2226 } 2227 break; 2228 2229 case Iop_AndV128: 2230 case Iop_AndV256: 2231 /* And8/And16/AndV128/AndV256(t,t) 2232 ==> t, for some IRTemp t */ 2233 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2234 e2 = e->Iex.Binop.arg1; 2235 break; 2236 } 2237 /* Deal with either arg zero. Could handle other And 2238 cases here too. */ 2239 if (e->Iex.Binop.op == Iop_AndV256 2240 && (isZeroV256(e->Iex.Binop.arg1) 2241 || isZeroV256(e->Iex.Binop.arg2))) { 2242 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2243 break; 2244 } else if (e->Iex.Binop.op == Iop_AndV128 2245 && (isZeroV128(e->Iex.Binop.arg1) 2246 || isZeroV128(e->Iex.Binop.arg2))) { 2247 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2248 break; 2249 } 2250 break; 2251 2252 case Iop_OrV128: 2253 case Iop_OrV256: 2254 /* V128/V256(t,t) ==> t, for some IRTemp t */ 2255 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2256 e2 = e->Iex.Binop.arg1; 2257 break; 2258 } 2259 /* OrV128(t,0) ==> t */ 2260 if (e->Iex.Binop.op == Iop_OrV128) { 2261 if (isZeroV128(e->Iex.Binop.arg2)) { 2262 e2 = e->Iex.Binop.arg1; 2263 break; 2264 } 2265 if (isZeroV128(e->Iex.Binop.arg1)) { 2266 e2 = e->Iex.Binop.arg2; 2267 break; 2268 } 2269 } 2270 /* OrV256(t,0) ==> t */ 2271 if (e->Iex.Binop.op == Iop_OrV256) { 2272 if (isZeroV256(e->Iex.Binop.arg2)) { 2273 e2 = e->Iex.Binop.arg1; 2274 break; 2275 } 2276 //Disabled because there's no known test case right now. 2277 //if (isZeroV256(e->Iex.Binop.arg1)) { 2278 // e2 = e->Iex.Binop.arg2; 2279 // break; 2280 //} 2281 } 2282 break; 2283 2284 case Iop_Xor8: 2285 case Iop_Xor16: 2286 case Iop_Xor32: 2287 case Iop_Xor64: 2288 case Iop_XorV128: 2289 case Iop_XorV256: 2290 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ 2291 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2292 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2293 break; 2294 } 2295 /* XorV128(t,0) ==> t */ 2296 if (e->Iex.Binop.op == Iop_XorV128) { 2297 if (isZeroV128(e->Iex.Binop.arg2)) { 2298 e2 = e->Iex.Binop.arg1; 2299 break; 2300 } 2301 //Disabled because there's no known test case right now. 2302 //if (isZeroV128(e->Iex.Binop.arg1)) { 2303 // e2 = e->Iex.Binop.arg2; 2304 // break; 2305 //} 2306 } else { 2307 /* Xor8/16/32/64(0,t) ==> t */ 2308 if (isZeroU(e->Iex.Binop.arg1)) { 2309 e2 = e->Iex.Binop.arg2; 2310 break; 2311 } 2312 /* Xor8/16/32/64(t,0) ==> t */ 2313 if (isZeroU(e->Iex.Binop.arg2)) { 2314 e2 = e->Iex.Binop.arg1; 2315 break; 2316 } 2317 } 2318 break; 2319 2320 case Iop_CmpNE32: 2321 /* CmpNE32(t,t) ==> 0, for some IRTemp t */ 2322 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2323 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2324 break; 2325 } 2326 /* CmpNE32(1Uto32(b), 0) ==> b */ 2327 if (isZeroU32(e->Iex.Binop.arg2)) { 2328 IRExpr* a1 = chase(env, e->Iex.Binop.arg1); 2329 if (a1 && a1->tag == Iex_Unop 2330 && a1->Iex.Unop.op == Iop_1Uto32) { 2331 e2 = a1->Iex.Unop.arg; 2332 break; 2333 } 2334 } 2335 break; 2336 2337 case Iop_CmpEQ32: 2338 case Iop_CmpEQ64: 2339 case Iop_CmpEQ8x8: 2340 case Iop_CmpEQ8x16: 2341 case Iop_CmpEQ16x8: 2342 case Iop_CmpEQ32x4: 2343 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2344 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2345 break; 2346 } 2347 break; 2348 2349 default: 2350 break; 2351 } 2352 } 2353 break; 2354 2355 case Iex_ITE: 2356 /* ITE */ 2357 /* is the discriminant is a constant? */ 2358 if (e->Iex.ITE.cond->tag == Iex_Const) { 2359 /* assured us by the IR type rules */ 2360 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1); 2361 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1 2362 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse; 2363 } 2364 else 2365 /* are the arms identical? (pretty weedy test) */ 2366 if (sameIRExprs(env, e->Iex.ITE.iftrue, 2367 e->Iex.ITE.iffalse)) { 2368 e2 = e->Iex.ITE.iffalse; 2369 } 2370 break; 2371 2372 default: 2373 /* not considered */ 2374 break; 2375 } 2376 2377 /* Show cases where we've found but not folded 'op(t,t)'. Be 2378 careful not to call sameIRExprs with values of different types, 2379 though, else it will assert (and so it should!). We can't 2380 conveniently call typeOfIRExpr on the two args without a whole 2381 bunch of extra plumbing to pass in a type env, so just use a 2382 hacky test to check the arguments are not anything that might 2383 sameIRExprs to assert. This is only OK because this kludge is 2384 only used for debug printing, not for "real" operation. For 2385 "real" operation (ie, all other calls to sameIRExprs), it is 2386 essential that the to args have the same type. 2387 2388 The "right" solution is to plumb the containing block's 2389 IRTypeEnv through to here and use typeOfIRExpr to be sure. But 2390 that's a bunch of extra parameter passing which will just slow 2391 down the normal case, for no purpose. */ 2392 if (vex_control.iropt_verbosity > 0 2393 && e == e2 2394 && e->tag == Iex_Binop 2395 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1, 2396 e->Iex.Binop.arg2) 2397 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2398 vex_printf("vex iropt: fold_Expr: no ident rule for: "); 2399 ppIRExpr(e); 2400 vex_printf("\n"); 2401 } 2402 2403 /* Show the overall results of folding. */ 2404 if (DEBUG_IROPT && e2 != e) { 2405 vex_printf("FOLD: "); 2406 ppIRExpr(e); vex_printf(" -> "); 2407 ppIRExpr(e2); vex_printf("\n"); 2408 } 2409 2410 return e2; 2411 2412 unhandled: 2413 # if 0 2414 vex_printf("\n\n"); 2415 ppIRExpr(e); 2416 vpanic("fold_Expr: no rule for the above"); 2417 # else 2418 if (vex_control.iropt_verbosity > 0) { 2419 vex_printf("vex iropt: fold_Expr: no const rule for: "); 2420 ppIRExpr(e); 2421 vex_printf("\n"); 2422 } 2423 return e2; 2424 # endif 2425 } 2426 2427 2428 /* Apply the subst to a simple 1-level expression -- guaranteed to be 2429 1-level due to previous flattening pass. */ 2430 2431 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) 2432 { 2433 switch (ex->tag) { 2434 case Iex_RdTmp: 2435 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { 2436 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; 2437 if (rhs->tag == Iex_RdTmp) 2438 return rhs; 2439 if (rhs->tag == Iex_Const 2440 && rhs->Iex.Const.con->tag != Ico_F64i) 2441 return rhs; 2442 } 2443 /* not bound in env */ 2444 return ex; 2445 2446 case Iex_Const: 2447 case Iex_Get: 2448 return ex; 2449 2450 case Iex_GetI: 2451 vassert(isIRAtom(ex->Iex.GetI.ix)); 2452 return IRExpr_GetI( 2453 ex->Iex.GetI.descr, 2454 subst_Expr(env, ex->Iex.GetI.ix), 2455 ex->Iex.GetI.bias 2456 ); 2457 2458 case Iex_Qop: { 2459 IRQop* qop = ex->Iex.Qop.details; 2460 vassert(isIRAtom(qop->arg1)); 2461 vassert(isIRAtom(qop->arg2)); 2462 vassert(isIRAtom(qop->arg3)); 2463 vassert(isIRAtom(qop->arg4)); 2464 return IRExpr_Qop( 2465 qop->op, 2466 subst_Expr(env, qop->arg1), 2467 subst_Expr(env, qop->arg2), 2468 subst_Expr(env, qop->arg3), 2469 subst_Expr(env, qop->arg4) 2470 ); 2471 } 2472 2473 case Iex_Triop: { 2474 IRTriop* triop = ex->Iex.Triop.details; 2475 vassert(isIRAtom(triop->arg1)); 2476 vassert(isIRAtom(triop->arg2)); 2477 vassert(isIRAtom(triop->arg3)); 2478 return IRExpr_Triop( 2479 triop->op, 2480 subst_Expr(env, triop->arg1), 2481 subst_Expr(env, triop->arg2), 2482 subst_Expr(env, triop->arg3) 2483 ); 2484 } 2485 2486 case Iex_Binop: 2487 vassert(isIRAtom(ex->Iex.Binop.arg1)); 2488 vassert(isIRAtom(ex->Iex.Binop.arg2)); 2489 return IRExpr_Binop( 2490 ex->Iex.Binop.op, 2491 subst_Expr(env, ex->Iex.Binop.arg1), 2492 subst_Expr(env, ex->Iex.Binop.arg2) 2493 ); 2494 2495 case Iex_Unop: 2496 vassert(isIRAtom(ex->Iex.Unop.arg)); 2497 return IRExpr_Unop( 2498 ex->Iex.Unop.op, 2499 subst_Expr(env, ex->Iex.Unop.arg) 2500 ); 2501 2502 case Iex_Load: 2503 vassert(isIRAtom(ex->Iex.Load.addr)); 2504 return IRExpr_Load( 2505 ex->Iex.Load.end, 2506 ex->Iex.Load.ty, 2507 subst_Expr(env, ex->Iex.Load.addr) 2508 ); 2509 2510 case Iex_CCall: { 2511 Int i; 2512 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args); 2513 for (i = 0; args2[i]; i++) { 2514 vassert(isIRAtom(args2[i])); 2515 args2[i] = subst_Expr(env, args2[i]); 2516 } 2517 return IRExpr_CCall( 2518 ex->Iex.CCall.cee, 2519 ex->Iex.CCall.retty, 2520 args2 2521 ); 2522 } 2523 2524 case Iex_ITE: 2525 vassert(isIRAtom(ex->Iex.ITE.cond)); 2526 vassert(isIRAtom(ex->Iex.ITE.iftrue)); 2527 vassert(isIRAtom(ex->Iex.ITE.iffalse)); 2528 return IRExpr_ITE( 2529 subst_Expr(env, ex->Iex.ITE.cond), 2530 subst_Expr(env, ex->Iex.ITE.iftrue), 2531 subst_Expr(env, ex->Iex.ITE.iffalse) 2532 ); 2533 2534 default: 2535 vex_printf("\n\n"); ppIRExpr(ex); 2536 vpanic("subst_Expr"); 2537 2538 } 2539 } 2540 2541 2542 /* Apply the subst to stmt, then fold the result as much as possible. 2543 Much simplified due to stmt being previously flattened. As a 2544 result of this, the stmt may wind up being turned into a no-op. 2545 */ 2546 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) 2547 { 2548 # if 0 2549 vex_printf("\nsubst and fold stmt\n"); 2550 ppIRStmt(st); 2551 vex_printf("\n"); 2552 # endif 2553 2554 switch (st->tag) { 2555 case Ist_AbiHint: 2556 vassert(isIRAtom(st->Ist.AbiHint.base)); 2557 vassert(isIRAtom(st->Ist.AbiHint.nia)); 2558 return IRStmt_AbiHint( 2559 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)), 2560 st->Ist.AbiHint.len, 2561 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia)) 2562 ); 2563 case Ist_Put: 2564 vassert(isIRAtom(st->Ist.Put.data)); 2565 return IRStmt_Put( 2566 st->Ist.Put.offset, 2567 fold_Expr(env, subst_Expr(env, st->Ist.Put.data)) 2568 ); 2569 2570 case Ist_PutI: { 2571 IRPutI *puti, *puti2; 2572 puti = st->Ist.PutI.details; 2573 vassert(isIRAtom(puti->ix)); 2574 vassert(isIRAtom(puti->data)); 2575 puti2 = mkIRPutI(puti->descr, 2576 fold_Expr(env, subst_Expr(env, puti->ix)), 2577 puti->bias, 2578 fold_Expr(env, subst_Expr(env, puti->data))); 2579 return IRStmt_PutI(puti2); 2580 } 2581 2582 case Ist_WrTmp: 2583 /* This is the one place where an expr (st->Ist.WrTmp.data) is 2584 allowed to be more than just a constant or a tmp. */ 2585 return IRStmt_WrTmp( 2586 st->Ist.WrTmp.tmp, 2587 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data)) 2588 ); 2589 2590 case Ist_Store: 2591 vassert(isIRAtom(st->Ist.Store.addr)); 2592 vassert(isIRAtom(st->Ist.Store.data)); 2593 return IRStmt_Store( 2594 st->Ist.Store.end, 2595 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)), 2596 fold_Expr(env, subst_Expr(env, st->Ist.Store.data)) 2597 ); 2598 2599 case Ist_StoreG: { 2600 IRStoreG* sg = st->Ist.StoreG.details; 2601 vassert(isIRAtom(sg->addr)); 2602 vassert(isIRAtom(sg->data)); 2603 vassert(isIRAtom(sg->guard)); 2604 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr)); 2605 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data)); 2606 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard)); 2607 if (fguard->tag == Iex_Const) { 2608 /* The condition on this store has folded down to a constant. */ 2609 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2610 if (fguard->Iex.Const.con->Ico.U1 == False) { 2611 return IRStmt_NoOp(); 2612 } else { 2613 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2614 return IRStmt_Store(sg->end, faddr, fdata); 2615 } 2616 } 2617 return IRStmt_StoreG(sg->end, faddr, fdata, fguard); 2618 } 2619 2620 case Ist_LoadG: { 2621 /* This is complicated. If the guard folds down to 'false', 2622 we can replace it with an assignment 'dst := alt', but if 2623 the guard folds down to 'true', we can't conveniently 2624 replace it with an unconditional load, because doing so 2625 requires generating a new temporary, and that is not easy 2626 to do at this point. */ 2627 IRLoadG* lg = st->Ist.LoadG.details; 2628 vassert(isIRAtom(lg->addr)); 2629 vassert(isIRAtom(lg->alt)); 2630 vassert(isIRAtom(lg->guard)); 2631 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr)); 2632 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt)); 2633 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard)); 2634 if (fguard->tag == Iex_Const) { 2635 /* The condition on this load has folded down to a constant. */ 2636 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2637 if (fguard->Iex.Const.con->Ico.U1 == False) { 2638 /* The load is not going to happen -- instead 'alt' is 2639 assigned to 'dst'. */ 2640 return IRStmt_WrTmp(lg->dst, falt); 2641 } else { 2642 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2643 /* The load is always going to happen. We want to 2644 convert to an unconditional load and assign to 'dst' 2645 (IRStmt_WrTmp). Problem is we need an extra temp to 2646 hold the loaded value, but none is available. 2647 Instead, reconstitute the conditional load (with 2648 folded args, of course) and let the caller of this 2649 routine deal with the problem. */ 2650 } 2651 } 2652 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard); 2653 } 2654 2655 case Ist_CAS: { 2656 IRCAS *cas, *cas2; 2657 cas = st->Ist.CAS.details; 2658 vassert(isIRAtom(cas->addr)); 2659 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 2660 vassert(isIRAtom(cas->expdLo)); 2661 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 2662 vassert(isIRAtom(cas->dataLo)); 2663 cas2 = mkIRCAS( 2664 cas->oldHi, cas->oldLo, cas->end, 2665 fold_Expr(env, subst_Expr(env, cas->addr)), 2666 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) 2667 : NULL, 2668 fold_Expr(env, subst_Expr(env, cas->expdLo)), 2669 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) 2670 : NULL, 2671 fold_Expr(env, subst_Expr(env, cas->dataLo)) 2672 ); 2673 return IRStmt_CAS(cas2); 2674 } 2675 2676 case Ist_LLSC: 2677 vassert(isIRAtom(st->Ist.LLSC.addr)); 2678 if (st->Ist.LLSC.storedata) 2679 vassert(isIRAtom(st->Ist.LLSC.storedata)); 2680 return IRStmt_LLSC( 2681 st->Ist.LLSC.end, 2682 st->Ist.LLSC.result, 2683 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)), 2684 st->Ist.LLSC.storedata 2685 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata)) 2686 : NULL 2687 ); 2688 2689 case Ist_Dirty: { 2690 Int i; 2691 IRDirty *d, *d2; 2692 d = st->Ist.Dirty.details; 2693 d2 = emptyIRDirty(); 2694 *d2 = *d; 2695 d2->args = shallowCopyIRExprVec(d2->args); 2696 if (d2->mFx != Ifx_None) { 2697 vassert(isIRAtom(d2->mAddr)); 2698 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr)); 2699 } 2700 vassert(isIRAtom(d2->guard)); 2701 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard)); 2702 for (i = 0; d2->args[i]; i++) { 2703 IRExpr* arg = d2->args[i]; 2704 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { 2705 vassert(isIRAtom(arg)); 2706 d2->args[i] = fold_Expr(env, subst_Expr(env, arg)); 2707 } 2708 } 2709 return IRStmt_Dirty(d2); 2710 } 2711 2712 case Ist_IMark: 2713 return IRStmt_IMark(st->Ist.IMark.addr, 2714 st->Ist.IMark.len, 2715 st->Ist.IMark.delta); 2716 2717 case Ist_NoOp: 2718 return IRStmt_NoOp(); 2719 2720 case Ist_MBE: 2721 return IRStmt_MBE(st->Ist.MBE.event); 2722 2723 case Ist_Exit: { 2724 IRExpr* fcond; 2725 vassert(isIRAtom(st->Ist.Exit.guard)); 2726 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard)); 2727 if (fcond->tag == Iex_Const) { 2728 /* Interesting. The condition on this exit has folded down to 2729 a constant. */ 2730 vassert(fcond->Iex.Const.con->tag == Ico_U1); 2731 if (fcond->Iex.Const.con->Ico.U1 == False) { 2732 /* exit is never going to happen, so dump the statement. */ 2733 return IRStmt_NoOp(); 2734 } else { 2735 vassert(fcond->Iex.Const.con->Ico.U1 == True); 2736 /* Hmmm. The exit has become unconditional. Leave it 2737 as it is for now, since we'd have to truncate the BB 2738 at this point, which is tricky. Such truncation is 2739 done later by the dead-code elimination pass. */ 2740 /* fall out into the reconstruct-the-exit code. */ 2741 if (vex_control.iropt_verbosity > 0) 2742 /* really a misuse of vex_control.iropt_verbosity */ 2743 vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); 2744 } 2745 } 2746 return IRStmt_Exit(fcond, st->Ist.Exit.jk, 2747 st->Ist.Exit.dst, st->Ist.Exit.offsIP); 2748 } 2749 2750 default: 2751 vex_printf("\n"); ppIRStmt(st); 2752 vpanic("subst_and_fold_Stmt"); 2753 } 2754 } 2755 2756 2757 IRSB* cprop_BB ( IRSB* in ) 2758 { 2759 Int i; 2760 IRSB* out; 2761 IRStmt* st2; 2762 Int n_tmps = in->tyenv->types_used; 2763 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); 2764 /* Keep track of IRStmt_LoadGs that we need to revisit after 2765 processing all the other statements. */ 2766 const Int N_FIXUPS = 16; 2767 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */ 2768 Int n_fixups = 0; 2769 2770 out = emptyIRSB(); 2771 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 2772 2773 /* Set up the env with which travels forward. This holds a 2774 substitution, mapping IRTemps to IRExprs. The environment 2775 is to be applied as we move along. Keys are IRTemps. 2776 Values are IRExpr*s. 2777 */ 2778 for (i = 0; i < n_tmps; i++) 2779 env[i] = NULL; 2780 2781 /* For each original SSA-form stmt ... */ 2782 for (i = 0; i < in->stmts_used; i++) { 2783 2784 /* First apply the substitution to the current stmt. This 2785 propagates in any constants and tmp-tmp assignments 2786 accumulated prior to this point. As part of the subst_Stmt 2787 call, also then fold any constant expressions resulting. */ 2788 2789 st2 = in->stmts[i]; 2790 2791 /* perhaps st2 is already a no-op? */ 2792 if (st2->tag == Ist_NoOp) continue; 2793 2794 st2 = subst_and_fold_Stmt( env, st2 ); 2795 2796 /* Deal with some post-folding special cases. */ 2797 switch (st2->tag) { 2798 2799 /* If the statement has been folded into a no-op, forget 2800 it. */ 2801 case Ist_NoOp: 2802 continue; 2803 2804 /* If the statement assigns to an IRTemp add it to the 2805 running environment. This is for the benefit of copy 2806 propagation and to allow sameIRExpr look through 2807 IRTemps. */ 2808 case Ist_WrTmp: { 2809 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); 2810 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 2811 2812 /* 't1 = t2' -- don't add to BB; will be optimized out */ 2813 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) 2814 continue; 2815 2816 /* 't = const' && 'const != F64i' -- don't add to BB 2817 Note, we choose not to propagate const when const is an 2818 F64i, so that F64i literals can be CSE'd later. This 2819 helps x86 floating point code generation. */ 2820 if (st2->Ist.WrTmp.data->tag == Iex_Const 2821 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) { 2822 continue; 2823 } 2824 /* else add it to the output, as normal */ 2825 break; 2826 } 2827 2828 case Ist_LoadG: { 2829 IRLoadG* lg = st2->Ist.LoadG.details; 2830 IRExpr* guard = lg->guard; 2831 if (guard->tag == Iex_Const) { 2832 /* The guard has folded to a constant, and that 2833 constant must be 1:I1, since subst_and_fold_Stmt 2834 folds out the case 0:I1 by itself. */ 2835 vassert(guard->Iex.Const.con->tag == Ico_U1); 2836 vassert(guard->Iex.Const.con->Ico.U1 == True); 2837 /* Add a NoOp here as a placeholder, and make a note of 2838 where it is in the output block. Afterwards we'll 2839 come back here and transform the NoOp and the LoadG 2840 into a load-convert pair. The fixups[] entry 2841 refers to the inserted NoOp, and we expect to find 2842 the relevant LoadG immediately after it. */ 2843 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2844 if (n_fixups < N_FIXUPS) { 2845 fixups[n_fixups++] = out->stmts_used; 2846 addStmtToIRSB( out, IRStmt_NoOp() ); 2847 } 2848 } 2849 /* And always add the LoadG to the output, regardless. */ 2850 break; 2851 } 2852 2853 default: 2854 break; 2855 } 2856 2857 /* Not interesting, copy st2 into the output block. */ 2858 addStmtToIRSB( out, st2 ); 2859 } 2860 2861 # if STATS_IROPT 2862 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", 2863 invocation_count, recursion_count, success_count, 2864 recursion_success_count, max_nodes_visited); 2865 # endif 2866 2867 out->next = subst_Expr( env, in->next ); 2868 out->jumpkind = in->jumpkind; 2869 out->offsIP = in->offsIP; 2870 2871 /* Process any leftover unconditional LoadGs that we noticed 2872 in the main pass. */ 2873 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2874 for (i = 0; i < n_fixups; i++) { 2875 Int ix = fixups[i]; 2876 /* Carefully verify that the LoadG has the expected form. */ 2877 vassert(ix >= 0 && ix+1 < out->stmts_used); 2878 IRStmt* nop = out->stmts[ix]; 2879 IRStmt* lgu = out->stmts[ix+1]; 2880 vassert(nop->tag == Ist_NoOp); 2881 vassert(lgu->tag == Ist_LoadG); 2882 IRLoadG* lg = lgu->Ist.LoadG.details; 2883 IRExpr* guard = lg->guard; 2884 vassert(guard->Iex.Const.con->tag == Ico_U1); 2885 vassert(guard->Iex.Const.con->Ico.U1 == True); 2886 /* Figure out the load and result types, and the implied 2887 conversion operation. */ 2888 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID; 2889 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg); 2890 IROp cvtOp = Iop_INVALID; 2891 switch (lg->cvt) { 2892 case ILGop_IdentV128: 2893 case ILGop_Ident64: 2894 case ILGop_Ident32: break; 2895 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break; 2896 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break; 2897 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break; 2898 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break; 2899 default: vpanic("cprop_BB: unhandled ILGOp"); 2900 } 2901 /* Replace the placeholder NoOp by the required unconditional 2902 load. */ 2903 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg); 2904 out->stmts[ix] 2905 = IRStmt_WrTmp(tLoaded, 2906 IRExpr_Load(lg->end, cvtArg, lg->addr)); 2907 /* Replace the LoadG by a conversion from the loaded value's 2908 type to the required result type. */ 2909 out->stmts[ix+1] 2910 = IRStmt_WrTmp( 2911 lg->dst, cvtOp == Iop_INVALID 2912 ? IRExpr_RdTmp(tLoaded) 2913 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded))); 2914 } 2915 2916 return out; 2917 } 2918 2919 2920 /*---------------------------------------------------------------*/ 2921 /*--- Dead code (t = E) removal ---*/ 2922 /*---------------------------------------------------------------*/ 2923 2924 /* As a side effect, also removes all code following an unconditional 2925 side exit. */ 2926 2927 /* The type of the HashHW map is: a map from IRTemp to nothing 2928 -- really just operating a set or IRTemps. 2929 */ 2930 2931 inline 2932 static void addUses_Temp ( Bool* set, IRTemp tmp ) 2933 { 2934 set[(Int)tmp] = True; 2935 } 2936 2937 static void addUses_Expr ( Bool* set, IRExpr* e ) 2938 { 2939 Int i; 2940 switch (e->tag) { 2941 case Iex_GetI: 2942 addUses_Expr(set, e->Iex.GetI.ix); 2943 return; 2944 case Iex_ITE: 2945 addUses_Expr(set, e->Iex.ITE.cond); 2946 addUses_Expr(set, e->Iex.ITE.iftrue); 2947 addUses_Expr(set, e->Iex.ITE.iffalse); 2948 return; 2949 case Iex_CCall: 2950 for (i = 0; e->Iex.CCall.args[i]; i++) 2951 addUses_Expr(set, e->Iex.CCall.args[i]); 2952 return; 2953 case Iex_Load: 2954 addUses_Expr(set, e->Iex.Load.addr); 2955 return; 2956 case Iex_Qop: 2957 addUses_Expr(set, e->Iex.Qop.details->arg1); 2958 addUses_Expr(set, e->Iex.Qop.details->arg2); 2959 addUses_Expr(set, e->Iex.Qop.details->arg3); 2960 addUses_Expr(set, e->Iex.Qop.details->arg4); 2961 return; 2962 case Iex_Triop: 2963 addUses_Expr(set, e->Iex.Triop.details->arg1); 2964 addUses_Expr(set, e->Iex.Triop.details->arg2); 2965 addUses_Expr(set, e->Iex.Triop.details->arg3); 2966 return; 2967 case Iex_Binop: 2968 addUses_Expr(set, e->Iex.Binop.arg1); 2969 addUses_Expr(set, e->Iex.Binop.arg2); 2970 return; 2971 case Iex_Unop: 2972 addUses_Expr(set, e->Iex.Unop.arg); 2973 return; 2974 case Iex_RdTmp: 2975 addUses_Temp(set, e->Iex.RdTmp.tmp); 2976 return; 2977 case Iex_Const: 2978 case Iex_Get: 2979 return; 2980 default: 2981 vex_printf("\n"); 2982 ppIRExpr(e); 2983 vpanic("addUses_Expr"); 2984 } 2985 } 2986 2987 static void addUses_Stmt ( Bool* set, IRStmt* st ) 2988 { 2989 Int i; 2990 IRDirty* d; 2991 IRCAS* cas; 2992 switch (st->tag) { 2993 case Ist_AbiHint: 2994 addUses_Expr(set, st->Ist.AbiHint.base); 2995 addUses_Expr(set, st->Ist.AbiHint.nia); 2996 return; 2997 case Ist_PutI: 2998 addUses_Expr(set, st->Ist.PutI.details->ix); 2999 addUses_Expr(set, st->Ist.PutI.details->data); 3000 return; 3001 case Ist_WrTmp: 3002 addUses_Expr(set, st->Ist.WrTmp.data); 3003 return; 3004 case Ist_Put: 3005 addUses_Expr(set, st->Ist.Put.data); 3006 return; 3007 case Ist_Store: 3008 addUses_Expr(set, st->Ist.Store.addr); 3009 addUses_Expr(set, st->Ist.Store.data); 3010 return; 3011 case Ist_StoreG: { 3012 IRStoreG* sg = st->Ist.StoreG.details; 3013 addUses_Expr(set, sg->addr); 3014 addUses_Expr(set, sg->data); 3015 addUses_Expr(set, sg->guard); 3016 return; 3017 } 3018 case Ist_LoadG: { 3019 IRLoadG* lg = st->Ist.LoadG.details; 3020 addUses_Expr(set, lg->addr); 3021 addUses_Expr(set, lg->alt); 3022 addUses_Expr(set, lg->guard); 3023 return; 3024 } 3025 case Ist_CAS: 3026 cas = st->Ist.CAS.details; 3027 addUses_Expr(set, cas->addr); 3028 if (cas->expdHi) 3029 addUses_Expr(set, cas->expdHi); 3030 addUses_Expr(set, cas->expdLo); 3031 if (cas->dataHi) 3032 addUses_Expr(set, cas->dataHi); 3033 addUses_Expr(set, cas->dataLo); 3034 return; 3035 case Ist_LLSC: 3036 addUses_Expr(set, st->Ist.LLSC.addr); 3037 if (st->Ist.LLSC.storedata) 3038 addUses_Expr(set, st->Ist.LLSC.storedata); 3039 return; 3040 case Ist_Dirty: 3041 d = st->Ist.Dirty.details; 3042 if (d->mFx != Ifx_None) 3043 addUses_Expr(set, d->mAddr); 3044 addUses_Expr(set, d->guard); 3045 for (i = 0; d->args[i] != NULL; i++) { 3046 IRExpr* arg = d->args[i]; 3047 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 3048 addUses_Expr(set, arg); 3049 } 3050 return; 3051 case Ist_NoOp: 3052 case Ist_IMark: 3053 case Ist_MBE: 3054 return; 3055 case Ist_Exit: 3056 addUses_Expr(set, st->Ist.Exit.guard); 3057 return; 3058 default: 3059 vex_printf("\n"); 3060 ppIRStmt(st); 3061 vpanic("addUses_Stmt"); 3062 } 3063 } 3064 3065 3066 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */ 3067 static Bool isZeroU1 ( IRExpr* e ) 3068 { 3069 return toBool( e->tag == Iex_Const 3070 && e->Iex.Const.con->tag == Ico_U1 3071 && e->Iex.Const.con->Ico.U1 == False ); 3072 } 3073 3074 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */ 3075 static Bool isOneU1 ( IRExpr* e ) 3076 { 3077 return toBool( e->tag == Iex_Const 3078 && e->Iex.Const.con->tag == Ico_U1 3079 && e->Iex.Const.con->Ico.U1 == True ); 3080 } 3081 3082 3083 /* Note, this destructively modifies the given IRSB. */ 3084 3085 /* Scan backwards through statements, carrying a set of IRTemps which 3086 are known to be used after the current point. On encountering 't = 3087 E', delete the binding if it is not used. Otherwise, add any temp 3088 uses to the set and keep on moving backwards. 3089 3090 As an enhancement, the first (backwards) pass searches for IR exits 3091 with always-taken conditions and notes the location of the earliest 3092 one in the block. If any such are found, a second pass copies the 3093 exit destination and jump kind to the bb-end. Then, the exit and 3094 all statements following it are turned into no-ops. 3095 */ 3096 3097 /* notstatic */ void do_deadcode_BB ( IRSB* bb ) 3098 { 3099 Int i, i_unconditional_exit; 3100 Int n_tmps = bb->tyenv->types_used; 3101 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool)); 3102 IRStmt* st; 3103 3104 for (i = 0; i < n_tmps; i++) 3105 set[i] = False; 3106 3107 /* start off by recording IRTemp uses in the next field. */ 3108 addUses_Expr(set, bb->next); 3109 3110 /* First pass */ 3111 3112 /* Work backwards through the stmts */ 3113 i_unconditional_exit = -1; 3114 for (i = bb->stmts_used-1; i >= 0; i--) { 3115 st = bb->stmts[i]; 3116 if (st->tag == Ist_NoOp) 3117 continue; 3118 /* take note of any unconditional exits */ 3119 if (st->tag == Ist_Exit 3120 && isOneU1(st->Ist.Exit.guard)) 3121 i_unconditional_exit = i; 3122 if (st->tag == Ist_WrTmp 3123 && set[(Int)(st->Ist.WrTmp.tmp)] == False) { 3124 /* it's an IRTemp which never got used. Delete it. */ 3125 if (DEBUG_IROPT) { 3126 vex_printf("DEAD: "); 3127 ppIRStmt(st); 3128 vex_printf("\n"); 3129 } 3130 bb->stmts[i] = IRStmt_NoOp(); 3131 } 3132 else 3133 if (st->tag == Ist_Dirty 3134 && st->Ist.Dirty.details->guard 3135 && isZeroU1(st->Ist.Dirty.details->guard)) { 3136 /* This is a dirty helper which will never get called. 3137 Delete it. */ 3138 bb->stmts[i] = IRStmt_NoOp(); 3139 } 3140 else { 3141 /* Note any IRTemp uses made by the current statement. */ 3142 addUses_Stmt(set, st); 3143 } 3144 } 3145 3146 /* Optional second pass: if any unconditional exits were found, 3147 delete them and all following statements. */ 3148 3149 if (i_unconditional_exit != -1) { 3150 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", 3151 i_unconditional_exit); 3152 vassert(i_unconditional_exit >= 0 3153 && i_unconditional_exit < bb->stmts_used); 3154 bb->next 3155 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); 3156 bb->jumpkind 3157 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; 3158 bb->offsIP 3159 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; 3160 for (i = i_unconditional_exit; i < bb->stmts_used; i++) 3161 bb->stmts[i] = IRStmt_NoOp(); 3162 } 3163 } 3164 3165 3166 /*---------------------------------------------------------------*/ 3167 /*--- Specialisation of helper function calls, in ---*/ 3168 /*--- collaboration with the front end ---*/ 3169 /*---------------------------------------------------------------*/ 3170 3171 static 3172 IRSB* spec_helpers_BB( 3173 IRSB* bb, 3174 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int) 3175 ) 3176 { 3177 Int i; 3178 IRStmt* st; 3179 IRExpr* ex; 3180 Bool any = False; 3181 3182 for (i = bb->stmts_used-1; i >= 0; i--) { 3183 st = bb->stmts[i]; 3184 3185 if (st->tag != Ist_WrTmp 3186 || st->Ist.WrTmp.data->tag != Iex_CCall) 3187 continue; 3188 3189 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, 3190 st->Ist.WrTmp.data->Iex.CCall.args, 3191 &bb->stmts[0], i ); 3192 if (!ex) 3193 /* the front end can't think of a suitable replacement */ 3194 continue; 3195 3196 /* We got something better. Install it in the bb. */ 3197 any = True; 3198 bb->stmts[i] 3199 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); 3200 3201 if (0) { 3202 vex_printf("SPEC: "); 3203 ppIRExpr(st->Ist.WrTmp.data); 3204 vex_printf(" --> "); 3205 ppIRExpr(ex); 3206 vex_printf("\n"); 3207 } 3208 } 3209 3210 if (any) 3211 bb = flatten_BB(bb); 3212 return bb; 3213 } 3214 3215 3216 /*---------------------------------------------------------------*/ 3217 /*--- Determination of guest state aliasing relationships ---*/ 3218 /*---------------------------------------------------------------*/ 3219 3220 /* These are helper functions for CSE and GetI/PutI transformations. 3221 3222 Determine, to the extent possible, the relationship between two 3223 guest state accesses. The possible outcomes are: 3224 3225 * Exact alias. These two accesses denote precisely the same 3226 piece of the guest state. 3227 3228 * Definitely no alias. These two accesses are guaranteed not to 3229 overlap any part of the guest state. 3230 3231 * Unknown -- if neither of the above can be established. 3232 3233 If in doubt, return Unknown. */ 3234 3235 typedef 3236 enum { ExactAlias, NoAlias, UnknownAlias } 3237 GSAliasing; 3238 3239 3240 /* Produces the alias relation between an indexed guest 3241 state access and a non-indexed access. */ 3242 3243 static 3244 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1, 3245 Int offset2, IRType ty2 ) 3246 { 3247 UInt minoff1, maxoff1, minoff2, maxoff2; 3248 3249 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3250 minoff2 = offset2; 3251 maxoff2 = minoff2 + sizeofIRType(ty2) - 1; 3252 3253 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3254 return NoAlias; 3255 3256 /* Could probably do better here if required. For the moment 3257 however just claim not to know anything more. */ 3258 return UnknownAlias; 3259 } 3260 3261 3262 /* Produces the alias relation between two indexed guest state 3263 accesses. */ 3264 3265 static 3266 GSAliasing getAliasingRelation_II ( 3267 IRRegArray* descr1, IRExpr* ix1, Int bias1, 3268 IRRegArray* descr2, IRExpr* ix2, Int bias2 3269 ) 3270 { 3271 UInt minoff1, maxoff1, minoff2, maxoff2; 3272 Int iters; 3273 3274 /* First try hard to show they don't alias. */ 3275 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3276 getArrayBounds( descr2, &minoff2, &maxoff2 ); 3277 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3278 return NoAlias; 3279 3280 /* So the two arrays at least partially overlap. To get any 3281 further we'll have to be sure that the descriptors are 3282 identical. */ 3283 if (!eqIRRegArray(descr1, descr2)) 3284 return UnknownAlias; 3285 3286 /* The descriptors are identical. Now the only difference can be 3287 in the index expressions. If they cannot be shown to be 3288 identical, we have to say we don't know what the aliasing 3289 relation will be. Now, since the IR is flattened, the index 3290 expressions should be atoms -- either consts or tmps. So that 3291 makes the comparison simple. */ 3292 vassert(isIRAtom(ix1)); 3293 vassert(isIRAtom(ix2)); 3294 if (!eqIRAtom(ix1,ix2)) 3295 return UnknownAlias; 3296 3297 /* Ok, the index expressions are identical. So now the only way 3298 they can be different is in the bias. Normalise this 3299 paranoidly, to reliably establish equality/non-equality. */ 3300 3301 /* So now we know that the GetI and PutI index the same array 3302 with the same base. Are the offsets the same, modulo the 3303 array size? Do this paranoidly. */ 3304 vassert(descr1->nElems == descr2->nElems); 3305 vassert(descr1->elemTy == descr2->elemTy); 3306 vassert(descr1->base == descr2->base); 3307 iters = 0; 3308 while (bias1 < 0 || bias2 < 0) { 3309 bias1 += descr1->nElems; 3310 bias2 += descr1->nElems; 3311 iters++; 3312 if (iters > 10) 3313 vpanic("getAliasingRelation: iters"); 3314 } 3315 vassert(bias1 >= 0 && bias2 >= 0); 3316 bias1 %= descr1->nElems; 3317 bias2 %= descr1->nElems; 3318 vassert(bias1 >= 0 && bias1 < descr1->nElems); 3319 vassert(bias2 >= 0 && bias2 < descr1->nElems); 3320 3321 /* Finally, biasP and biasG are normalised into the range 3322 0 .. descrP/G->nElems - 1. And so we can establish 3323 equality/non-equality. */ 3324 3325 return bias1==bias2 ? ExactAlias : NoAlias; 3326 } 3327 3328 3329 /*---------------------------------------------------------------*/ 3330 /*--- Common Subexpression Elimination ---*/ 3331 /*---------------------------------------------------------------*/ 3332 3333 /* Expensive in time and space. */ 3334 3335 /* Uses two environments: 3336 a IRTemp -> IRTemp mapping 3337 a mapping from AvailExpr* to IRTemp 3338 */ 3339 3340 typedef 3341 struct { 3342 enum { TCc, TCt } tag; 3343 union { IRTemp tmp; IRConst* con; } u; 3344 } 3345 TmpOrConst; 3346 3347 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 ) 3348 { 3349 if (tc1->tag != tc2->tag) 3350 return False; 3351 switch (tc1->tag) { 3352 case TCc: 3353 return eqIRConst(tc1->u.con, tc2->u.con); 3354 case TCt: 3355 return tc1->u.tmp == tc2->u.tmp; 3356 default: 3357 vpanic("eqTmpOrConst"); 3358 } 3359 } 3360 3361 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 ) 3362 { 3363 Bool eq = cee1->addr == cee2->addr; 3364 if (eq) { 3365 vassert(cee1->regparms == cee2->regparms); 3366 vassert(cee1->mcx_mask == cee2->mcx_mask); 3367 /* Names should be the same too, but we don't bother to 3368 check. */ 3369 } 3370 return eq; 3371 } 3372 3373 /* Convert an atomic IRExpr* to a TmpOrConst. */ 3374 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e ) 3375 { 3376 switch (e->tag) { 3377 case Iex_RdTmp: 3378 tc->tag = TCt; 3379 tc->u.tmp = e->Iex.RdTmp.tmp; 3380 break; 3381 case Iex_Const: 3382 tc->tag = TCc; 3383 tc->u.con = e->Iex.Const.con; 3384 break; 3385 default: 3386 /* Getting here is a serious error. It means that the 3387 presented arg isn't an IR atom, as it should be. */ 3388 vpanic("irExpr_to_TmpOrConst"); 3389 } 3390 } 3391 3392 /* Convert a TmpOrConst to an atomic IRExpr*. */ 3393 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc ) 3394 { 3395 switch (tc->tag) { 3396 case TCc: return IRExpr_Const(tc->u.con); 3397 case TCt: return IRExpr_RdTmp(tc->u.tmp); 3398 default: vpanic("tmpOrConst_to_IRExpr"); 3399 } 3400 } 3401 3402 /* Convert a NULL terminated IRExpr* vector to an array of 3403 TmpOrConsts, and a length. */ 3404 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs, 3405 /*OUT*/Int* nOuts, 3406 IRExpr** ins ) 3407 { 3408 Int i, n; 3409 /* We have to make two passes, one to count, one to copy. */ 3410 for (n = 0; ins[n]; n++) 3411 ; 3412 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst)); 3413 *nOuts = n; 3414 /* and now copy .. */ 3415 for (i = 0; i < n; i++) { 3416 IRExpr* arg = ins[i]; 3417 TmpOrConst* dst = &(*outs)[i]; 3418 irExpr_to_TmpOrConst(dst, arg); 3419 } 3420 } 3421 3422 typedef 3423 struct { 3424 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt, 3425 CCall, Load 3426 } tag; 3427 union { 3428 /* unop(tmp) */ 3429 struct { 3430 IROp op; 3431 IRTemp arg; 3432 } Ut; 3433 /* binop(tmp,tmp) */ 3434 struct { 3435 IROp op; 3436 IRTemp arg1; 3437 IRTemp arg2; 3438 } Btt; 3439 /* binop(tmp,const) */ 3440 struct { 3441 IROp op; 3442 IRTemp arg1; 3443 IRConst con2; 3444 } Btc; 3445 /* binop(const,tmp) */ 3446 struct { 3447 IROp op; 3448 IRConst con1; 3449 IRTemp arg2; 3450 } Bct; 3451 /* F64i-style const */ 3452 struct { 3453 ULong f64i; 3454 } Cf64i; 3455 /* ITE(tmp,tmp,tmp) */ 3456 struct { 3457 IRTemp co; 3458 IRTemp e1; 3459 IRTemp e0; 3460 } Ittt; 3461 /* ITE(tmp,tmp,const) */ 3462 struct { 3463 IRTemp co; 3464 IRTemp e1; 3465 IRConst con0; 3466 } Ittc; 3467 /* ITE(tmp,const,tmp) */ 3468 struct { 3469 IRTemp co; 3470 IRConst con1; 3471 IRTemp e0; 3472 } Itct; 3473 /* ITE(tmp,const,const) */ 3474 struct { 3475 IRTemp co; 3476 IRConst con1; 3477 IRConst con0; 3478 } Itcc; 3479 /* GetI(descr,tmp,bias)*/ 3480 struct { 3481 IRRegArray* descr; 3482 IRTemp ix; 3483 Int bias; 3484 } GetIt; 3485 /* Clean helper call */ 3486 struct { 3487 IRCallee* cee; 3488 TmpOrConst* args; 3489 Int nArgs; 3490 IRType retty; 3491 } CCall; 3492 /* Load(end,ty,addr) */ 3493 struct { 3494 IREndness end; 3495 IRType ty; 3496 TmpOrConst addr; 3497 } Load; 3498 } u; 3499 } 3500 AvailExpr; 3501 3502 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 ) 3503 { 3504 if (LIKELY(a1->tag != a2->tag)) 3505 return False; 3506 switch (a1->tag) { 3507 case Ut: 3508 return toBool( 3509 a1->u.Ut.op == a2->u.Ut.op 3510 && a1->u.Ut.arg == a2->u.Ut.arg); 3511 case Btt: 3512 return toBool( 3513 a1->u.Btt.op == a2->u.Btt.op 3514 && a1->u.Btt.arg1 == a2->u.Btt.arg1 3515 && a1->u.Btt.arg2 == a2->u.Btt.arg2); 3516 case Btc: 3517 return toBool( 3518 a1->u.Btc.op == a2->u.Btc.op 3519 && a1->u.Btc.arg1 == a2->u.Btc.arg1 3520 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2)); 3521 case Bct: 3522 return toBool( 3523 a1->u.Bct.op == a2->u.Bct.op 3524 && a1->u.Bct.arg2 == a2->u.Bct.arg2 3525 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1)); 3526 case Cf64i: 3527 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i); 3528 case Ittt: 3529 return toBool(a1->u.Ittt.co == a2->u.Ittt.co 3530 && a1->u.Ittt.e1 == a2->u.Ittt.e1 3531 && a1->u.Ittt.e0 == a2->u.Ittt.e0); 3532 case Ittc: 3533 return toBool(a1->u.Ittc.co == a2->u.Ittc.co 3534 && a1->u.Ittc.e1 == a2->u.Ittc.e1 3535 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0)); 3536 case Itct: 3537 return toBool(a1->u.Itct.co == a2->u.Itct.co 3538 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1) 3539 && a1->u.Itct.e0 == a2->u.Itct.e0); 3540 case Itcc: 3541 return toBool(a1->u.Itcc.co == a2->u.Itcc.co 3542 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1) 3543 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0)); 3544 case GetIt: 3545 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr) 3546 && a1->u.GetIt.ix == a2->u.GetIt.ix 3547 && a1->u.GetIt.bias == a2->u.GetIt.bias); 3548 case CCall: { 3549 Int i, n; 3550 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs 3551 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee); 3552 if (eq) { 3553 n = a1->u.CCall.nArgs; 3554 for (i = 0; i < n; i++) { 3555 if (!eqTmpOrConst( &a1->u.CCall.args[i], 3556 &a2->u.CCall.args[i] )) { 3557 eq = False; 3558 break; 3559 } 3560 } 3561 } 3562 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty); 3563 return eq; 3564 } 3565 case Load: { 3566 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end 3567 && a1->u.Load.ty == a2->u.Load.ty 3568 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr)); 3569 return eq; 3570 } 3571 default: 3572 vpanic("eq_AvailExpr"); 3573 } 3574 } 3575 3576 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae ) 3577 { 3578 IRConst *con, *con0, *con1; 3579 switch (ae->tag) { 3580 case Ut: 3581 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) ); 3582 case Btt: 3583 return IRExpr_Binop( ae->u.Btt.op, 3584 IRExpr_RdTmp(ae->u.Btt.arg1), 3585 IRExpr_RdTmp(ae->u.Btt.arg2) ); 3586 case Btc: 3587 con = LibVEX_Alloc_inline(sizeof(IRConst)); 3588 *con = ae->u.Btc.con2; 3589 return IRExpr_Binop( ae->u.Btc.op, 3590 IRExpr_RdTmp(ae->u.Btc.arg1), 3591 IRExpr_Const(con) ); 3592 case Bct: 3593 con = LibVEX_Alloc_inline(sizeof(IRConst)); 3594 *con = ae->u.Bct.con1; 3595 return IRExpr_Binop( ae->u.Bct.op, 3596 IRExpr_Const(con), 3597 IRExpr_RdTmp(ae->u.Bct.arg2) ); 3598 case Cf64i: 3599 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i)); 3600 case Ittt: 3601 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co), 3602 IRExpr_RdTmp(ae->u.Ittt.e1), 3603 IRExpr_RdTmp(ae->u.Ittt.e0)); 3604 case Ittc: 3605 con0 = LibVEX_Alloc_inline(sizeof(IRConst)); 3606 *con0 = ae->u.Ittc.con0; 3607 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co), 3608 IRExpr_RdTmp(ae->u.Ittc.e1), 3609 IRExpr_Const(con0)); 3610 case Itct: 3611 con1 = LibVEX_Alloc_inline(sizeof(IRConst)); 3612 *con1 = ae->u.Itct.con1; 3613 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co), 3614 IRExpr_Const(con1), 3615 IRExpr_RdTmp(ae->u.Itct.e0)); 3616 3617 case Itcc: 3618 con0 = LibVEX_Alloc_inline(sizeof(IRConst)); 3619 con1 = LibVEX_Alloc_inline(sizeof(IRConst)); 3620 *con0 = ae->u.Itcc.con0; 3621 *con1 = ae->u.Itcc.con1; 3622 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co), 3623 IRExpr_Const(con1), 3624 IRExpr_Const(con0)); 3625 case GetIt: 3626 return IRExpr_GetI(ae->u.GetIt.descr, 3627 IRExpr_RdTmp(ae->u.GetIt.ix), 3628 ae->u.GetIt.bias); 3629 case CCall: { 3630 Int i, n = ae->u.CCall.nArgs; 3631 vassert(n >= 0); 3632 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*)); 3633 vec[n] = NULL; 3634 for (i = 0; i < n; i++) { 3635 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]); 3636 } 3637 return IRExpr_CCall(ae->u.CCall.cee, 3638 ae->u.CCall.retty, 3639 vec); 3640 } 3641 case Load: 3642 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty, 3643 tmpOrConst_to_IRExpr(&ae->u.Load.addr)); 3644 default: 3645 vpanic("availExpr_to_IRExpr"); 3646 } 3647 } 3648 3649 inline 3650 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp ) 3651 { 3652 HWord res; 3653 /* env :: IRTemp -> IRTemp */ 3654 if (lookupHHW( env, &res, (HWord)tmp )) 3655 return (IRTemp)res; 3656 else 3657 return tmp; 3658 } 3659 3660 inline 3661 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc, 3662 HashHW* env ) 3663 { 3664 /* env :: IRTemp -> IRTemp */ 3665 if (tc->tag == TCt) { 3666 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp ); 3667 } 3668 } 3669 3670 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae ) 3671 { 3672 /* env :: IRTemp -> IRTemp */ 3673 switch (ae->tag) { 3674 case Ut: 3675 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg ); 3676 break; 3677 case Btt: 3678 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 ); 3679 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 ); 3680 break; 3681 case Btc: 3682 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 ); 3683 break; 3684 case Bct: 3685 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 ); 3686 break; 3687 case Cf64i: 3688 break; 3689 case Ittt: 3690 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co ); 3691 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 ); 3692 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 ); 3693 break; 3694 case Ittc: 3695 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co ); 3696 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 ); 3697 break; 3698 case Itct: 3699 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co ); 3700 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 ); 3701 break; 3702 case Itcc: 3703 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co ); 3704 break; 3705 case GetIt: 3706 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix ); 3707 break; 3708 case CCall: { 3709 Int i, n = ae->u.CCall.nArgs;; 3710 for (i = 0; i < n; i++) { 3711 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env); 3712 } 3713 break; 3714 } 3715 case Load: 3716 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env); 3717 break; 3718 default: 3719 vpanic("subst_AvailExpr"); 3720 } 3721 } 3722 3723 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd ) 3724 { 3725 AvailExpr* ae; 3726 3727 switch (e->tag) { 3728 case Iex_Unop: 3729 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 3730 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3731 ae->tag = Ut; 3732 ae->u.Ut.op = e->Iex.Unop.op; 3733 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp; 3734 return ae; 3735 } 3736 break; 3737 3738 case Iex_Binop: 3739 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) { 3740 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3741 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3742 ae->tag = Btt; 3743 ae->u.Btt.op = e->Iex.Binop.op; 3744 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3745 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3746 return ae; 3747 } 3748 if (e->Iex.Binop.arg2->tag == Iex_Const) { 3749 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3750 ae->tag = Btc; 3751 ae->u.Btc.op = e->Iex.Binop.op; 3752 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3753 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con); 3754 return ae; 3755 } 3756 } else if (e->Iex.Binop.arg1->tag == Iex_Const 3757 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3758 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3759 ae->tag = Bct; 3760 ae->u.Bct.op = e->Iex.Binop.op; 3761 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3762 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con); 3763 return ae; 3764 } 3765 break; 3766 3767 case Iex_Const: 3768 if (e->Iex.Const.con->tag == Ico_F64i) { 3769 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3770 ae->tag = Cf64i; 3771 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i; 3772 return ae; 3773 } 3774 break; 3775 3776 case Iex_ITE: 3777 if (e->Iex.ITE.cond->tag == Iex_RdTmp) { 3778 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) { 3779 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3780 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3781 ae->tag = Ittt; 3782 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3783 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3784 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3785 return ae; 3786 } 3787 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3788 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3789 ae->tag = Itct; 3790 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3791 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3792 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3793 return ae; 3794 } 3795 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) { 3796 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3797 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3798 ae->tag = Ittc; 3799 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3800 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3801 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3802 return ae; 3803 } 3804 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3805 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3806 ae->tag = Itcc; 3807 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3808 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3809 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3810 return ae; 3811 } 3812 } 3813 } 3814 break; 3815 3816 case Iex_GetI: 3817 if (e->Iex.GetI.ix->tag == Iex_RdTmp) { 3818 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3819 ae->tag = GetIt; 3820 ae->u.GetIt.descr = e->Iex.GetI.descr; 3821 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp; 3822 ae->u.GetIt.bias = e->Iex.GetI.bias; 3823 return ae; 3824 } 3825 break; 3826 3827 case Iex_CCall: 3828 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3829 ae->tag = CCall; 3830 /* Ok to share only the cee, since it is immutable. */ 3831 ae->u.CCall.cee = e->Iex.CCall.cee; 3832 ae->u.CCall.retty = e->Iex.CCall.retty; 3833 /* irExprVec_to_TmpOrConsts will assert if the args are 3834 neither tmps nor constants, but that's ok .. that's all they 3835 should be. */ 3836 irExprVec_to_TmpOrConsts( 3837 &ae->u.CCall.args, &ae->u.CCall.nArgs, 3838 e->Iex.CCall.args 3839 ); 3840 return ae; 3841 3842 case Iex_Load: 3843 /* If the caller of do_cse_BB has requested that loads also 3844 be CSEd, convert them into AvailExprs. If not, we'll just 3845 return NULL here, and the load never becomes considered 3846 "available", which effectively disables CSEing of them, as 3847 desired. */ 3848 if (allowLoadsToBeCSEd) { 3849 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3850 ae->tag = Load; 3851 ae->u.Load.end = e->Iex.Load.end; 3852 ae->u.Load.ty = e->Iex.Load.ty; 3853 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr); 3854 return ae; 3855 } 3856 break; 3857 3858 default: 3859 break; 3860 } 3861 3862 return NULL; 3863 } 3864 3865 3866 /* The BB is modified in-place. Returns True if any changes were 3867 made. The caller can choose whether or not loads should be CSEd. 3868 In the normal course of things we don't do that, since CSEing loads 3869 is something of a dodgy proposition if the guest program is doing 3870 some screwy stuff to do with races and spinloops. */ 3871 3872 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) 3873 { 3874 Int i, j, paranoia; 3875 IRTemp t, q; 3876 IRStmt* st; 3877 AvailExpr* eprime; 3878 AvailExpr* ae; 3879 Bool invalidate; 3880 Bool anyDone = False; 3881 3882 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ 3883 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ 3884 3885 vassert(sizeof(IRTemp) <= sizeof(HWord)); 3886 3887 if (0) { ppIRSB(bb); vex_printf("\n\n"); } 3888 3889 /* Iterate forwards over the stmts. 3890 On seeing "t = E", where E is one of the AvailExpr forms: 3891 let E' = apply tenv substitution to E 3892 search aenv for E' 3893 if a mapping E' -> q is found, 3894 replace this stmt by "t = q" 3895 and add binding t -> q to tenv 3896 else 3897 add binding E' -> t to aenv 3898 replace this stmt by "t = E'" 3899 3900 Other statements are only interesting to the extent that they 3901 might invalidate some of the expressions in aenv. So there is 3902 an invalidate-bindings check for each statement seen. 3903 */ 3904 for (i = 0; i < bb->stmts_used; i++) { 3905 st = bb->stmts[i]; 3906 3907 /* ------ BEGIN invalidate aenv bindings ------ */ 3908 /* This is critical: remove from aenv any E' -> .. bindings 3909 which might be invalidated by this statement. The only 3910 vulnerable kind of bindings are the GetI and Load kinds. 3911 Dirty call - dump (paranoia level -> 2) 3912 Store - dump (ditto) 3913 Put, PutI - dump unless no-overlap is proven (.. -> 1) 3914 Uses getAliasingRelation_IC and getAliasingRelation_II 3915 to do the no-overlap assessments needed for Put/PutI. 3916 */ 3917 switch (st->tag) { 3918 case Ist_Dirty: case Ist_Store: case Ist_MBE: 3919 case Ist_CAS: case Ist_LLSC: 3920 case Ist_StoreG: 3921 paranoia = 2; break; 3922 case Ist_Put: case Ist_PutI: 3923 paranoia = 1; break; 3924 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: 3925 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG: 3926 paranoia = 0; break; 3927 default: 3928 vpanic("do_cse_BB(1)"); 3929 } 3930 3931 if (paranoia > 0) { 3932 for (j = 0; j < aenv->used; j++) { 3933 if (!aenv->inuse[j]) 3934 continue; 3935 ae = (AvailExpr*)aenv->key[j]; 3936 if (ae->tag != GetIt && ae->tag != Load) 3937 continue; 3938 invalidate = False; 3939 if (paranoia >= 2) { 3940 invalidate = True; 3941 } else { 3942 vassert(paranoia == 1); 3943 if (ae->tag == Load) { 3944 /* Loads can be invalidated by anything that could 3945 possibly touch memory. But in that case we 3946 should have |paranoia| == 2 and we won't get 3947 here. So there's nothing to do; we don't have to 3948 invalidate the load. */ 3949 } 3950 else 3951 if (st->tag == Ist_Put) { 3952 if (getAliasingRelation_IC( 3953 ae->u.GetIt.descr, 3954 IRExpr_RdTmp(ae->u.GetIt.ix), 3955 st->Ist.Put.offset, 3956 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) 3957 ) != NoAlias) 3958 invalidate = True; 3959 } 3960 else 3961 if (st->tag == Ist_PutI) { 3962 IRPutI *puti = st->Ist.PutI.details; 3963 if (getAliasingRelation_II( 3964 ae->u.GetIt.descr, 3965 IRExpr_RdTmp(ae->u.GetIt.ix), 3966 ae->u.GetIt.bias, 3967 puti->descr, 3968 puti->ix, 3969 puti->bias 3970 ) != NoAlias) 3971 invalidate = True; 3972 } 3973 else 3974 vpanic("do_cse_BB(2)"); 3975 } 3976 3977 if (invalidate) { 3978 aenv->inuse[j] = False; 3979 aenv->key[j] = (HWord)NULL; /* be sure */ 3980 } 3981 } /* for j */ 3982 } /* paranoia > 0 */ 3983 3984 /* ------ ENV invalidate aenv bindings ------ */ 3985 3986 /* ignore not-interestings */ 3987 if (st->tag != Ist_WrTmp) 3988 continue; 3989 3990 t = st->Ist.WrTmp.tmp; 3991 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd); 3992 /* ignore if not of AvailExpr form */ 3993 if (!eprime) 3994 continue; 3995 3996 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */ 3997 3998 /* apply tenv */ 3999 subst_AvailExpr( tenv, eprime ); 4000 4001 /* search aenv for eprime, unfortunately the hard way */ 4002 for (j = 0; j < aenv->used; j++) 4003 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j])) 4004 break; 4005 4006 if (j < aenv->used) { 4007 /* A binding E' -> q was found. Replace stmt by "t = q" and 4008 note the t->q binding in tenv. */ 4009 /* (this is the core of the CSE action) */ 4010 q = (IRTemp)aenv->val[j]; 4011 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); 4012 addToHHW( tenv, (HWord)t, (HWord)q ); 4013 anyDone = True; 4014 } else { 4015 /* No binding was found, so instead we add E' -> t to our 4016 collection of available expressions, replace this stmt 4017 with "t = E'", and move on. */ 4018 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); 4019 addToHHW( aenv, (HWord)eprime, (HWord)t ); 4020 } 4021 } 4022 4023 /* 4024 ppIRSB(bb); 4025 sanityCheckIRSB(bb, Ity_I32); 4026 vex_printf("\n\n"); 4027 */ 4028 return anyDone; 4029 } 4030 4031 4032 /*---------------------------------------------------------------*/ 4033 /*--- Add32/Sub32 chain collapsing ---*/ 4034 /*---------------------------------------------------------------*/ 4035 4036 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */ 4037 4038 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If 4039 yes, set *tmp and *i32 appropriately. *i32 is set as if the 4040 root node is Add32, not Sub32. */ 4041 4042 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) 4043 { 4044 if (e->tag != Iex_Binop) 4045 return False; 4046 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32) 4047 return False; 4048 if (e->Iex.Binop.arg1->tag != Iex_RdTmp) 4049 return False; 4050 if (e->Iex.Binop.arg2->tag != Iex_Const) 4051 return False; 4052 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 4053 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32); 4054 if (e->Iex.Binop.op == Iop_Sub32) 4055 *i32 = -*i32; 4056 return True; 4057 } 4058 4059 4060 /* Figure out if tmp can be expressed as tmp2 +32 const, for some 4061 other tmp2. Scan backwards from the specified start point -- an 4062 optimisation. */ 4063 4064 static Bool collapseChain ( IRSB* bb, Int startHere, 4065 IRTemp tmp, 4066 IRTemp* tmp2, Int* i32 ) 4067 { 4068 Int j, ii; 4069 IRTemp vv; 4070 IRStmt* st; 4071 IRExpr* e; 4072 4073 /* the (var, con) pair contain the current 'representation' for 4074 'tmp'. We start with 'tmp + 0'. */ 4075 IRTemp var = tmp; 4076 Int con = 0; 4077 4078 /* Scan backwards to see if tmp can be replaced by some other tmp 4079 +/- a constant. */ 4080 for (j = startHere; j >= 0; j--) { 4081 st = bb->stmts[j]; 4082 if (st->tag != Ist_WrTmp) 4083 continue; 4084 if (st->Ist.WrTmp.tmp != var) 4085 continue; 4086 e = st->Ist.WrTmp.data; 4087 if (!isAdd32OrSub32(e, &vv, &ii)) 4088 break; 4089 var = vv; 4090 con += ii; 4091 } 4092 if (j == -1) 4093 /* no earlier binding for var .. ill-formed IR */ 4094 vpanic("collapseChain"); 4095 4096 /* so, did we find anything interesting? */ 4097 if (var == tmp) 4098 return False; /* no .. */ 4099 4100 *tmp2 = var; 4101 *i32 = con; 4102 return True; 4103 } 4104 4105 4106 /* ------- Main function for Add32/Sub32 chain collapsing ------ */ 4107 4108 static void collapse_AddSub_chains_BB ( IRSB* bb ) 4109 { 4110 IRStmt *st; 4111 IRTemp var, var2; 4112 Int i, con, con2; 4113 4114 for (i = bb->stmts_used-1; i >= 0; i--) { 4115 st = bb->stmts[i]; 4116 if (st->tag == Ist_NoOp) 4117 continue; 4118 4119 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */ 4120 4121 if (st->tag == Ist_WrTmp 4122 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) { 4123 4124 /* So e1 is of the form Add32(var,con) or Sub32(var,-con). 4125 Find out if var can be expressed as var2 + con2. */ 4126 if (collapseChain(bb, i-1, var, &var2, &con2)) { 4127 if (DEBUG_IROPT) { 4128 vex_printf("replacing1 "); 4129 ppIRStmt(st); 4130 vex_printf(" with "); 4131 } 4132 con2 += con; 4133 bb->stmts[i] 4134 = IRStmt_WrTmp( 4135 st->Ist.WrTmp.tmp, 4136 (con2 >= 0) 4137 ? IRExpr_Binop(Iop_Add32, 4138 IRExpr_RdTmp(var2), 4139 IRExpr_Const(IRConst_U32(con2))) 4140 : IRExpr_Binop(Iop_Sub32, 4141 IRExpr_RdTmp(var2), 4142 IRExpr_Const(IRConst_U32(-con2))) 4143 ); 4144 if (DEBUG_IROPT) { 4145 ppIRStmt(bb->stmts[i]); 4146 vex_printf("\n"); 4147 } 4148 } 4149 4150 continue; 4151 } 4152 4153 /* Try to collapse 't1 = GetI[t2, con]'. */ 4154 4155 if (st->tag == Ist_WrTmp 4156 && st->Ist.WrTmp.data->tag == Iex_GetI 4157 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp 4158 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix 4159 ->Iex.RdTmp.tmp, &var2, &con2)) { 4160 if (DEBUG_IROPT) { 4161 vex_printf("replacing3 "); 4162 ppIRStmt(st); 4163 vex_printf(" with "); 4164 } 4165 con2 += st->Ist.WrTmp.data->Iex.GetI.bias; 4166 bb->stmts[i] 4167 = IRStmt_WrTmp( 4168 st->Ist.WrTmp.tmp, 4169 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, 4170 IRExpr_RdTmp(var2), 4171 con2)); 4172 if (DEBUG_IROPT) { 4173 ppIRStmt(bb->stmts[i]); 4174 vex_printf("\n"); 4175 } 4176 continue; 4177 } 4178 4179 /* Perhaps st is PutI[t, con] ? */ 4180 IRPutI *puti = st->Ist.PutI.details; 4181 if (st->tag == Ist_PutI 4182 && puti->ix->tag == Iex_RdTmp 4183 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp, 4184 &var2, &con2)) { 4185 if (DEBUG_IROPT) { 4186 vex_printf("replacing2 "); 4187 ppIRStmt(st); 4188 vex_printf(" with "); 4189 } 4190 con2 += puti->bias; 4191 bb->stmts[i] 4192 = IRStmt_PutI(mkIRPutI(puti->descr, 4193 IRExpr_RdTmp(var2), 4194 con2, 4195 puti->data)); 4196 if (DEBUG_IROPT) { 4197 ppIRStmt(bb->stmts[i]); 4198 vex_printf("\n"); 4199 } 4200 continue; 4201 } 4202 4203 } /* for */ 4204 } 4205 4206 4207 /*---------------------------------------------------------------*/ 4208 /*--- PutI/GetI transformations ---*/ 4209 /*---------------------------------------------------------------*/ 4210 4211 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from 4212 the given starting point to find, if any, a PutI which writes 4213 exactly the same piece of guest state, and so return the expression 4214 that the PutI writes. This is the core of PutI-GetI forwarding. */ 4215 4216 static 4217 IRExpr* findPutI ( IRSB* bb, Int startHere, 4218 IRRegArray* descrG, IRExpr* ixG, Int biasG ) 4219 { 4220 Int j; 4221 IRStmt* st; 4222 GSAliasing relation; 4223 4224 if (0) { 4225 vex_printf("\nfindPutI "); 4226 ppIRRegArray(descrG); 4227 vex_printf(" "); 4228 ppIRExpr(ixG); 4229 vex_printf(" %d\n", biasG); 4230 } 4231 4232 /* Scan backwards in bb from startHere to find a suitable PutI 4233 binding for (descrG, ixG, biasG), if any. */ 4234 4235 for (j = startHere; j >= 0; j--) { 4236 st = bb->stmts[j]; 4237 if (st->tag == Ist_NoOp) 4238 continue; 4239 4240 if (st->tag == Ist_Put) { 4241 /* Non-indexed Put. This can't give a binding, but we do 4242 need to check it doesn't invalidate the search by 4243 overlapping any part of the indexed guest state. */ 4244 4245 relation 4246 = getAliasingRelation_IC( 4247 descrG, ixG, 4248 st->Ist.Put.offset, 4249 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 4250 4251 if (relation == NoAlias) { 4252 /* we're OK; keep going */ 4253 continue; 4254 } else { 4255 /* relation == UnknownAlias || relation == ExactAlias */ 4256 /* If this assertion fails, we've found a Put which writes 4257 an area of guest state which is read by a GetI. Which 4258 is unlikely (although not per se wrong). */ 4259 vassert(relation != ExactAlias); 4260 /* This Put potentially writes guest state that the GetI 4261 reads; we must fail. */ 4262 return NULL; 4263 } 4264 } 4265 4266 if (st->tag == Ist_PutI) { 4267 IRPutI *puti = st->Ist.PutI.details; 4268 4269 relation = getAliasingRelation_II( 4270 descrG, ixG, biasG, 4271 puti->descr, 4272 puti->ix, 4273 puti->bias 4274 ); 4275 4276 if (relation == NoAlias) { 4277 /* This PutI definitely doesn't overlap. Ignore it and 4278 keep going. */ 4279 continue; /* the for j loop */ 4280 } 4281 4282 if (relation == UnknownAlias) { 4283 /* We don't know if this PutI writes to the same guest 4284 state that the GetI, or not. So we have to give up. */ 4285 return NULL; 4286 } 4287 4288 /* Otherwise, we've found what we're looking for. */ 4289 vassert(relation == ExactAlias); 4290 return puti->data; 4291 4292 } /* if (st->tag == Ist_PutI) */ 4293 4294 if (st->tag == Ist_Dirty) { 4295 /* Be conservative. If the dirty call has any guest effects at 4296 all, give up. We could do better -- only give up if there 4297 are any guest writes/modifies. */ 4298 if (st->Ist.Dirty.details->nFxState > 0) 4299 return NULL; 4300 } 4301 4302 } /* for */ 4303 4304 /* No valid replacement was found. */ 4305 return NULL; 4306 } 4307 4308 4309 4310 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense 4311 that it writes exactly the same piece of guest state) ? Safe 4312 answer: False. */ 4313 4314 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 ) 4315 { 4316 vassert(pi->tag == Ist_PutI); 4317 if (s2->tag != Ist_PutI) 4318 return False; 4319 4320 IRPutI *p1 = pi->Ist.PutI.details; 4321 IRPutI *p2 = s2->Ist.PutI.details; 4322 4323 return toBool( 4324 getAliasingRelation_II( 4325 p1->descr, p1->ix, p1->bias, 4326 p2->descr, p2->ix, p2->bias 4327 ) 4328 == ExactAlias 4329 ); 4330 } 4331 4332 4333 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might 4334 overlap it? Safe answer: True. Note, we could do a lot better 4335 than this if needed. */ 4336 4337 static 4338 Bool guestAccessWhichMightOverlapPutI ( 4339 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2 4340 ) 4341 { 4342 GSAliasing relation; 4343 UInt minoffP, maxoffP; 4344 4345 vassert(pi->tag == Ist_PutI); 4346 4347 IRPutI *p1 = pi->Ist.PutI.details; 4348 4349 getArrayBounds(p1->descr, &minoffP, &maxoffP); 4350 switch (s2->tag) { 4351 4352 case Ist_NoOp: 4353 case Ist_IMark: 4354 return False; 4355 4356 case Ist_MBE: 4357 case Ist_AbiHint: 4358 /* just be paranoid ... these should be rare. */ 4359 return True; 4360 4361 case Ist_CAS: 4362 /* This is unbelievably lame, but it's probably not 4363 significant from a performance point of view. Really, a 4364 CAS is a load-store op, so it should be safe to say False. 4365 However .. */ 4366 return True; 4367 4368 case Ist_Dirty: 4369 /* If the dirty call has any guest effects at all, give up. 4370 Probably could do better. */ 4371 if (s2->Ist.Dirty.details->nFxState > 0) 4372 return True; 4373 return False; 4374 4375 case Ist_Put: 4376 vassert(isIRAtom(s2->Ist.Put.data)); 4377 relation 4378 = getAliasingRelation_IC( 4379 p1->descr, p1->ix, 4380 s2->Ist.Put.offset, 4381 typeOfIRExpr(tyenv,s2->Ist.Put.data) 4382 ); 4383 goto have_relation; 4384 4385 case Ist_PutI: { 4386 IRPutI *p2 = s2->Ist.PutI.details; 4387 4388 vassert(isIRAtom(p2->ix)); 4389 vassert(isIRAtom(p2->data)); 4390 relation 4391 = getAliasingRelation_II( 4392 p1->descr, p1->ix, p1->bias, 4393 p2->descr, p2->ix, p2->bias 4394 ); 4395 goto have_relation; 4396 } 4397 4398 case Ist_WrTmp: 4399 if (s2->Ist.WrTmp.data->tag == Iex_GetI) { 4400 relation 4401 = getAliasingRelation_II( 4402 p1->descr, p1->ix, p1->bias, 4403 s2->Ist.WrTmp.data->Iex.GetI.descr, 4404 s2->Ist.WrTmp.data->Iex.GetI.ix, 4405 s2->Ist.WrTmp.data->Iex.GetI.bias 4406 ); 4407 goto have_relation; 4408 } 4409 if (s2->Ist.WrTmp.data->tag == Iex_Get) { 4410 relation 4411 = getAliasingRelation_IC( 4412 p1->descr, p1->ix, 4413 s2->Ist.WrTmp.data->Iex.Get.offset, 4414 s2->Ist.WrTmp.data->Iex.Get.ty 4415 ); 4416 goto have_relation; 4417 } 4418 return False; 4419 4420 case Ist_Store: 4421 vassert(isIRAtom(s2->Ist.Store.addr)); 4422 vassert(isIRAtom(s2->Ist.Store.data)); 4423 return False; 4424 4425 default: 4426 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n"); 4427 vpanic("guestAccessWhichMightOverlapPutI"); 4428 } 4429 4430 have_relation: 4431 if (relation == NoAlias) 4432 return False; 4433 else 4434 return True; /* ExactAlias or UnknownAlias */ 4435 } 4436 4437 4438 4439 /* ---------- PutI/GetI transformations main functions --------- */ 4440 4441 /* Remove redundant GetIs, to the extent that they can be detected. 4442 bb is modified in-place. */ 4443 4444 static 4445 void do_redundant_GetI_elimination ( IRSB* bb ) 4446 { 4447 Int i; 4448 IRStmt* st; 4449 4450 for (i = bb->stmts_used-1; i >= 0; i--) { 4451 st = bb->stmts[i]; 4452 if (st->tag == Ist_NoOp) 4453 continue; 4454 4455 if (st->tag == Ist_WrTmp 4456 && st->Ist.WrTmp.data->tag == Iex_GetI 4457 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) { 4458 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr; 4459 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix; 4460 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias; 4461 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias); 4462 if (replacement 4463 && isIRAtom(replacement) 4464 /* Make sure we're doing a type-safe transformation! */ 4465 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) { 4466 if (DEBUG_IROPT) { 4467 vex_printf("rGI: "); 4468 ppIRExpr(st->Ist.WrTmp.data); 4469 vex_printf(" -> "); 4470 ppIRExpr(replacement); 4471 vex_printf("\n"); 4472 } 4473 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement); 4474 } 4475 } 4476 } 4477 4478 } 4479 4480 4481 /* Remove redundant PutIs, to the extent which they can be detected. 4482 bb is modified in-place. */ 4483 4484 static 4485 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl ) 4486 { 4487 Int i, j; 4488 Bool delete; 4489 IRStmt *st, *stj; 4490 4491 vassert(pxControl < VexRegUpdAllregsAtEachInsn); 4492 4493 for (i = 0; i < bb->stmts_used; i++) { 4494 st = bb->stmts[i]; 4495 if (st->tag != Ist_PutI) 4496 continue; 4497 /* Ok, search forwards from here to see if we can find another 4498 PutI which makes this one redundant, and dodging various 4499 hazards. Search forwards: 4500 * If conditional exit, give up (because anything after that 4501 does not postdominate this put). 4502 * If a Get which might overlap, give up (because this PutI 4503 not necessarily dead). 4504 * If a Put which is identical, stop with success. 4505 * If a Put which might overlap, but is not identical, give up. 4506 * If a dirty helper call which might write guest state, give up. 4507 * If a Put which definitely doesn't overlap, or any other 4508 kind of stmt, continue. 4509 */ 4510 delete = False; 4511 for (j = i+1; j < bb->stmts_used; j++) { 4512 stj = bb->stmts[j]; 4513 if (stj->tag == Ist_NoOp) 4514 continue; 4515 if (identicalPutIs(st, stj)) { 4516 /* success! */ 4517 delete = True; 4518 break; 4519 } 4520 if (stj->tag == Ist_Exit) 4521 /* give up */ 4522 break; 4523 if (st->tag == Ist_Dirty) 4524 /* give up; could do better here */ 4525 break; 4526 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj)) 4527 /* give up */ 4528 break; 4529 } 4530 4531 if (delete) { 4532 if (DEBUG_IROPT) { 4533 vex_printf("rPI: "); 4534 ppIRStmt(st); 4535 vex_printf("\n"); 4536 } 4537 bb->stmts[i] = IRStmt_NoOp(); 4538 } 4539 4540 } 4541 } 4542 4543 4544 /*---------------------------------------------------------------*/ 4545 /*--- Loop unrolling ---*/ 4546 /*---------------------------------------------------------------*/ 4547 4548 /* Adjust all tmp values (names) in e by delta. e is destructively 4549 modified. */ 4550 4551 static void deltaIRExpr ( IRExpr* e, Int delta ) 4552 { 4553 Int i; 4554 switch (e->tag) { 4555 case Iex_RdTmp: 4556 e->Iex.RdTmp.tmp += delta; 4557 break; 4558 case Iex_Get: 4559 case Iex_Const: 4560 break; 4561 case Iex_GetI: 4562 deltaIRExpr(e->Iex.GetI.ix, delta); 4563 break; 4564 case Iex_Qop: 4565 deltaIRExpr(e->Iex.Qop.details->arg1, delta); 4566 deltaIRExpr(e->Iex.Qop.details->arg2, delta); 4567 deltaIRExpr(e->Iex.Qop.details->arg3, delta); 4568 deltaIRExpr(e->Iex.Qop.details->arg4, delta); 4569 break; 4570 case Iex_Triop: 4571 deltaIRExpr(e->Iex.Triop.details->arg1, delta); 4572 deltaIRExpr(e->Iex.Triop.details->arg2, delta); 4573 deltaIRExpr(e->Iex.Triop.details->arg3, delta); 4574 break; 4575 case Iex_Binop: 4576 deltaIRExpr(e->Iex.Binop.arg1, delta); 4577 deltaIRExpr(e->Iex.Binop.arg2, delta); 4578 break; 4579 case Iex_Unop: 4580 deltaIRExpr(e->Iex.Unop.arg, delta); 4581 break; 4582 case Iex_Load: 4583 deltaIRExpr(e->Iex.Load.addr, delta); 4584 break; 4585 case Iex_CCall: 4586 for (i = 0; e->Iex.CCall.args[i]; i++) 4587 deltaIRExpr(e->Iex.CCall.args[i], delta); 4588 break; 4589 case Iex_ITE: 4590 deltaIRExpr(e->Iex.ITE.cond, delta); 4591 deltaIRExpr(e->Iex.ITE.iftrue, delta); 4592 deltaIRExpr(e->Iex.ITE.iffalse, delta); 4593 break; 4594 default: 4595 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4596 vpanic("deltaIRExpr"); 4597 } 4598 } 4599 4600 /* Adjust all tmp values (names) in st by delta. st is destructively 4601 modified. */ 4602 4603 static void deltaIRStmt ( IRStmt* st, Int delta ) 4604 { 4605 Int i; 4606 IRDirty* d; 4607 switch (st->tag) { 4608 case Ist_NoOp: 4609 case Ist_IMark: 4610 case Ist_MBE: 4611 break; 4612 case Ist_AbiHint: 4613 deltaIRExpr(st->Ist.AbiHint.base, delta); 4614 deltaIRExpr(st->Ist.AbiHint.nia, delta); 4615 break; 4616 case Ist_Put: 4617 deltaIRExpr(st->Ist.Put.data, delta); 4618 break; 4619 case Ist_PutI: 4620 deltaIRExpr(st->Ist.PutI.details->ix, delta); 4621 deltaIRExpr(st->Ist.PutI.details->data, delta); 4622 break; 4623 case Ist_WrTmp: 4624 st->Ist.WrTmp.tmp += delta; 4625 deltaIRExpr(st->Ist.WrTmp.data, delta); 4626 break; 4627 case Ist_Exit: 4628 deltaIRExpr(st->Ist.Exit.guard, delta); 4629 break; 4630 case Ist_Store: 4631 deltaIRExpr(st->Ist.Store.addr, delta); 4632 deltaIRExpr(st->Ist.Store.data, delta); 4633 break; 4634 case Ist_StoreG: { 4635 IRStoreG* sg = st->Ist.StoreG.details; 4636 deltaIRExpr(sg->addr, delta); 4637 deltaIRExpr(sg->data, delta); 4638 deltaIRExpr(sg->guard, delta); 4639 break; 4640 } 4641 case Ist_LoadG: { 4642 IRLoadG* lg = st->Ist.LoadG.details; 4643 lg->dst += delta; 4644 deltaIRExpr(lg->addr, delta); 4645 deltaIRExpr(lg->alt, delta); 4646 deltaIRExpr(lg->guard, delta); 4647 break; 4648 } 4649 case Ist_CAS: 4650 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID) 4651 st->Ist.CAS.details->oldHi += delta; 4652 st->Ist.CAS.details->oldLo += delta; 4653 deltaIRExpr(st->Ist.CAS.details->addr, delta); 4654 if (st->Ist.CAS.details->expdHi) 4655 deltaIRExpr(st->Ist.CAS.details->expdHi, delta); 4656 deltaIRExpr(st->Ist.CAS.details->expdLo, delta); 4657 if (st->Ist.CAS.details->dataHi) 4658 deltaIRExpr(st->Ist.CAS.details->dataHi, delta); 4659 deltaIRExpr(st->Ist.CAS.details->dataLo, delta); 4660 break; 4661 case Ist_LLSC: 4662 st->Ist.LLSC.result += delta; 4663 deltaIRExpr(st->Ist.LLSC.addr, delta); 4664 if (st->Ist.LLSC.storedata) 4665 deltaIRExpr(st->Ist.LLSC.storedata, delta); 4666 break; 4667 case Ist_Dirty: 4668 d = st->Ist.Dirty.details; 4669 deltaIRExpr(d->guard, delta); 4670 for (i = 0; d->args[i]; i++) { 4671 IRExpr* arg = d->args[i]; 4672 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 4673 deltaIRExpr(arg, delta); 4674 } 4675 if (d->tmp != IRTemp_INVALID) 4676 d->tmp += delta; 4677 if (d->mAddr) 4678 deltaIRExpr(d->mAddr, delta); 4679 break; 4680 default: 4681 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4682 vpanic("deltaIRStmt"); 4683 } 4684 } 4685 4686 4687 /* If possible, return a loop-unrolled version of bb0. The original 4688 is changed. If not possible, return NULL. */ 4689 4690 /* The two schemas considered are: 4691 4692 X: BODY; goto X 4693 4694 which unrolls to (eg) X: BODY;BODY; goto X 4695 4696 and 4697 4698 X: BODY; if (c) goto X; goto Y 4699 which trivially transforms to 4700 X: BODY; if (!c) goto Y; goto X; 4701 so it falls in the scope of the first case. 4702 4703 X and Y must be literal (guest) addresses. 4704 */ 4705 4706 static Int calc_unroll_factor( IRSB* bb ) 4707 { 4708 Int n_stmts, i; 4709 4710 n_stmts = 0; 4711 for (i = 0; i < bb->stmts_used; i++) { 4712 if (bb->stmts[i]->tag != Ist_NoOp) 4713 n_stmts++; 4714 } 4715 4716 if (n_stmts <= vex_control.iropt_unroll_thresh/8) { 4717 if (vex_control.iropt_verbosity > 0) 4718 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n", 4719 n_stmts, 8* n_stmts); 4720 return 8; 4721 } 4722 if (n_stmts <= vex_control.iropt_unroll_thresh/4) { 4723 if (vex_control.iropt_verbosity > 0) 4724 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n", 4725 n_stmts, 4* n_stmts); 4726 return 4; 4727 } 4728 4729 if (n_stmts <= vex_control.iropt_unroll_thresh/2) { 4730 if (vex_control.iropt_verbosity > 0) 4731 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n", 4732 n_stmts, 2* n_stmts); 4733 return 2; 4734 } 4735 4736 if (vex_control.iropt_verbosity > 0) 4737 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts); 4738 4739 return 1; 4740 } 4741 4742 4743 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr ) 4744 { 4745 Int i, j, jmax, n_vars; 4746 Bool xxx_known; 4747 Addr64 xxx_value, yyy_value; 4748 IRExpr* udst; 4749 IRStmt* st; 4750 IRConst* con; 4751 IRSB *bb1, *bb2; 4752 Int unroll_factor; 4753 4754 if (vex_control.iropt_unroll_thresh <= 0) 4755 return NULL; 4756 4757 /* First off, figure out if we can unroll this loop. Do this 4758 without modifying bb0. */ 4759 4760 if (bb0->jumpkind != Ijk_Boring) 4761 return NULL; 4762 4763 xxx_known = False; 4764 xxx_value = 0; 4765 4766 /* Extract the next-guest address. If it isn't a literal, we 4767 have to give up. */ 4768 4769 udst = bb0->next; 4770 if (udst->tag == Iex_Const 4771 && (udst->Iex.Const.con->tag == Ico_U32 4772 || udst->Iex.Const.con->tag == Ico_U64)) { 4773 /* The BB ends in a jump to a literal location. */ 4774 xxx_known = True; 4775 xxx_value = udst->Iex.Const.con->tag == Ico_U64 4776 ? udst->Iex.Const.con->Ico.U64 4777 : (Addr64)(udst->Iex.Const.con->Ico.U32); 4778 } 4779 4780 if (!xxx_known) 4781 return NULL; 4782 4783 /* Now we know the BB ends to a jump to a literal location. If 4784 it's a jump to itself (viz, idiom #1), move directly to the 4785 unrolling stage, first cloning the bb so the original isn't 4786 modified. */ 4787 if (xxx_value == my_addr) { 4788 unroll_factor = calc_unroll_factor( bb0 ); 4789 if (unroll_factor < 2) 4790 return NULL; 4791 bb1 = deepCopyIRSB( bb0 ); 4792 bb0 = NULL; 4793 udst = NULL; /* is now invalid */ 4794 goto do_unroll; 4795 } 4796 4797 /* Search for the second idiomatic form: 4798 X: BODY; if (c) goto X; goto Y 4799 We know Y, but need to establish that the last stmt 4800 is 'if (c) goto X'. 4801 */ 4802 yyy_value = xxx_value; 4803 for (i = bb0->stmts_used-1; i >= 0; i--) 4804 if (bb0->stmts[i]) 4805 break; 4806 4807 if (i < 0) 4808 return NULL; /* block with no stmts. Strange. */ 4809 4810 st = bb0->stmts[i]; 4811 if (st->tag != Ist_Exit) 4812 return NULL; 4813 if (st->Ist.Exit.jk != Ijk_Boring) 4814 return NULL; 4815 4816 con = st->Ist.Exit.dst; 4817 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4818 4819 xxx_value = con->tag == Ico_U64 4820 ? st->Ist.Exit.dst->Ico.U64 4821 : (Addr64)(st->Ist.Exit.dst->Ico.U32); 4822 4823 /* If this assertion fails, we have some kind of type error. */ 4824 vassert(con->tag == udst->Iex.Const.con->tag); 4825 4826 if (xxx_value != my_addr) 4827 /* We didn't find either idiom. Give up. */ 4828 return NULL; 4829 4830 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and 4831 yyy values (which makes it look like idiom #1), and go into 4832 unrolling proper. This means finding (again) the last stmt, in 4833 the copied BB. */ 4834 4835 unroll_factor = calc_unroll_factor( bb0 ); 4836 if (unroll_factor < 2) 4837 return NULL; 4838 4839 bb1 = deepCopyIRSB( bb0 ); 4840 bb0 = NULL; 4841 udst = NULL; /* is now invalid */ 4842 for (i = bb1->stmts_used-1; i >= 0; i--) 4843 if (bb1->stmts[i]) 4844 break; 4845 4846 /* The next bunch of assertions should be true since we already 4847 found and checked the last stmt in the original bb. */ 4848 4849 vassert(i >= 0); 4850 4851 st = bb1->stmts[i]; 4852 vassert(st->tag == Ist_Exit); 4853 4854 con = st->Ist.Exit.dst; 4855 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4856 4857 udst = bb1->next; 4858 vassert(udst->tag == Iex_Const); 4859 vassert(udst->Iex.Const.con->tag == Ico_U32 4860 || udst->Iex.Const.con->tag == Ico_U64); 4861 vassert(con->tag == udst->Iex.Const.con->tag); 4862 4863 /* switch the xxx and yyy fields around */ 4864 if (con->tag == Ico_U64) { 4865 udst->Iex.Const.con->Ico.U64 = xxx_value; 4866 con->Ico.U64 = yyy_value; 4867 } else { 4868 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value; 4869 con->Ico.U32 = (UInt)yyy_value; 4870 } 4871 4872 /* negate the test condition */ 4873 st->Ist.Exit.guard 4874 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard)); 4875 4876 /* --- The unroller proper. Both idioms are by now --- */ 4877 /* --- now converted to idiom 1. --- */ 4878 4879 do_unroll: 4880 4881 vassert(unroll_factor == 2 4882 || unroll_factor == 4 4883 || unroll_factor == 8); 4884 4885 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1); 4886 for (j = 1; j <= jmax; j++) { 4887 4888 n_vars = bb1->tyenv->types_used; 4889 4890 bb2 = deepCopyIRSB(bb1); 4891 for (i = 0; i < n_vars; i++) 4892 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]); 4893 4894 for (i = 0; i < bb2->stmts_used; i++) { 4895 /* deltaIRStmt destructively modifies the stmt, but 4896 that's OK since bb2 is a complete fresh copy of bb1. */ 4897 deltaIRStmt(bb2->stmts[i], n_vars); 4898 addStmtToIRSB(bb1, bb2->stmts[i]); 4899 } 4900 } 4901 4902 if (DEBUG_IROPT) { 4903 vex_printf("\nUNROLLED (%lx)\n", my_addr); 4904 ppIRSB(bb1); 4905 vex_printf("\n"); 4906 } 4907 4908 /* Flattening; sigh. The unroller succeeds in breaking flatness 4909 by negating the test condition. This should be fixed properly. 4910 For the moment use this shotgun approach. */ 4911 return flatten_BB(bb1); 4912 } 4913 4914 4915 /*---------------------------------------------------------------*/ 4916 /*--- The tree builder ---*/ 4917 /*---------------------------------------------------------------*/ 4918 4919 /* This isn't part of IR optimisation. Really it's a pass done prior 4920 to instruction selection, which improves the code that the 4921 instruction selector can produce. */ 4922 4923 /* --- The 'tmp' environment is the central data structure here --- */ 4924 4925 /* The number of outstanding bindings we're prepared to track. 4926 The number of times the env becomes full and we have to dump 4927 the oldest binding (hence reducing code quality) falls very 4928 rapidly as the env size increases. 8 gives reasonable performance 4929 under most circumstances. */ 4930 #define A_NENV 10 4931 4932 /* An interval. Used to record the bytes in the guest state accessed 4933 by a Put[I] statement or by (one or more) Get[I] expression(s). In 4934 case of several Get[I] expressions, the lower/upper bounds are recorded. 4935 This is conservative but cheap. 4936 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107]. 4937 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at 4938 offset 200 would be recorded as [100,203] */ 4939 typedef 4940 struct { 4941 Bool present; 4942 Int low; 4943 Int high; 4944 } 4945 Interval; 4946 4947 static inline Bool 4948 intervals_overlap(Interval i1, Interval i2) 4949 { 4950 return (i1.low >= i2.low && i1.low <= i2.high) || 4951 (i2.low >= i1.low && i2.low <= i1.high); 4952 } 4953 4954 static inline void 4955 update_interval(Interval *i, Int low, Int high) 4956 { 4957 vassert(low <= high); 4958 4959 if (i->present) { 4960 if (low < i->low) i->low = low; 4961 if (high > i->high) i->high = high; 4962 } else { 4963 i->present = True; 4964 i->low = low; 4965 i->high = high; 4966 } 4967 } 4968 4969 4970 /* bindee == NULL === slot is not in use 4971 bindee != NULL === slot is in use 4972 */ 4973 typedef 4974 struct { 4975 IRTemp binder; 4976 IRExpr* bindee; 4977 Bool doesLoad; 4978 /* Record the bytes of the guest state BINDEE reads from. */ 4979 Interval getInterval; 4980 } 4981 ATmpInfo; 4982 4983 __attribute__((unused)) 4984 static void ppAEnv ( ATmpInfo* env ) 4985 { 4986 Int i; 4987 for (i = 0; i < A_NENV; i++) { 4988 vex_printf("%d tmp %d val ", i, (Int)env[i].binder); 4989 if (env[i].bindee) 4990 ppIRExpr(env[i].bindee); 4991 else 4992 vex_printf("(null)"); 4993 vex_printf("\n"); 4994 } 4995 } 4996 4997 /* --- Tree-traversal fns --- */ 4998 4999 /* Traverse an expr, and detect if any part of it reads memory or does 5000 a Get. Be careful ... this really controls how much the 5001 tree-builder can reorder the code, so getting it right is critical. 5002 */ 5003 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e ) 5004 { 5005 Int i; 5006 switch (e->tag) { 5007 case Iex_CCall: 5008 for (i = 0; e->Iex.CCall.args[i]; i++) 5009 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]); 5010 return; 5011 case Iex_ITE: 5012 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond); 5013 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue); 5014 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse); 5015 return; 5016 case Iex_Qop: 5017 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1); 5018 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2); 5019 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3); 5020 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4); 5021 return; 5022 case Iex_Triop: 5023 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1); 5024 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2); 5025 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3); 5026 return; 5027 case Iex_Binop: 5028 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1); 5029 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2); 5030 return; 5031 case Iex_Unop: 5032 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg); 5033 return; 5034 case Iex_Load: 5035 *doesLoad = True; 5036 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr); 5037 return; 5038 case Iex_Get: { 5039 Int low = e->Iex.Get.offset; 5040 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1; 5041 update_interval(getInterval, low, high); 5042 return; 5043 } 5044 case Iex_GetI: { 5045 IRRegArray *descr = e->Iex.GetI.descr; 5046 Int size = sizeofIRType(descr->elemTy); 5047 Int low = descr->base; 5048 Int high = low + descr->nElems * size - 1; 5049 update_interval(getInterval, low, high); 5050 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix); 5051 return; 5052 } 5053 case Iex_RdTmp: 5054 case Iex_Const: 5055 return; 5056 default: 5057 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5058 vpanic("setHints_Expr"); 5059 } 5060 } 5061 5062 5063 /* Add a binding to the front of the env and slide all the rest 5064 backwards. It should be the case that the last slot is free. */ 5065 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee ) 5066 { 5067 Int i; 5068 vassert(env[A_NENV-1].bindee == NULL); 5069 for (i = A_NENV-1; i >= 1; i--) 5070 env[i] = env[i-1]; 5071 env[0].binder = binder; 5072 env[0].bindee = bindee; 5073 env[0].doesLoad = False; /* filled in later */ 5074 env[0].getInterval.present = False; /* filled in later */ 5075 env[0].getInterval.low = -1; /* filled in later */ 5076 env[0].getInterval.high = -1; /* filled in later */ 5077 } 5078 5079 /* Given uses :: array of UShort, indexed by IRTemp 5080 Add the use-occurrences of temps in this expression 5081 to the env. 5082 */ 5083 static void aoccCount_Expr ( UShort* uses, IRExpr* e ) 5084 { 5085 Int i; 5086 5087 switch (e->tag) { 5088 5089 case Iex_RdTmp: /* the only interesting case */ 5090 uses[e->Iex.RdTmp.tmp]++; 5091 return; 5092 5093 case Iex_ITE: 5094 aoccCount_Expr(uses, e->Iex.ITE.cond); 5095 aoccCount_Expr(uses, e->Iex.ITE.iftrue); 5096 aoccCount_Expr(uses, e->Iex.ITE.iffalse); 5097 return; 5098 5099 case Iex_Qop: 5100 aoccCount_Expr(uses, e->Iex.Qop.details->arg1); 5101 aoccCount_Expr(uses, e->Iex.Qop.details->arg2); 5102 aoccCount_Expr(uses, e->Iex.Qop.details->arg3); 5103 aoccCount_Expr(uses, e->Iex.Qop.details->arg4); 5104 return; 5105 5106 case Iex_Triop: 5107 aoccCount_Expr(uses, e->Iex.Triop.details->arg1); 5108 aoccCount_Expr(uses, e->Iex.Triop.details->arg2); 5109 aoccCount_Expr(uses, e->Iex.Triop.details->arg3); 5110 return; 5111 5112 case Iex_Binop: 5113 aoccCount_Expr(uses, e->Iex.Binop.arg1); 5114 aoccCount_Expr(uses, e->Iex.Binop.arg2); 5115 return; 5116 5117 case Iex_Unop: 5118 aoccCount_Expr(uses, e->Iex.Unop.arg); 5119 return; 5120 5121 case Iex_Load: 5122 aoccCount_Expr(uses, e->Iex.Load.addr); 5123 return; 5124 5125 case Iex_CCall: 5126 for (i = 0; e->Iex.CCall.args[i]; i++) 5127 aoccCount_Expr(uses, e->Iex.CCall.args[i]); 5128 return; 5129 5130 case Iex_GetI: 5131 aoccCount_Expr(uses, e->Iex.GetI.ix); 5132 return; 5133 5134 case Iex_Const: 5135 case Iex_Get: 5136 return; 5137 5138 default: 5139 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5140 vpanic("aoccCount_Expr"); 5141 } 5142 } 5143 5144 5145 /* Given uses :: array of UShort, indexed by IRTemp 5146 Add the use-occurrences of temps in this statement 5147 to the env. 5148 */ 5149 static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) 5150 { 5151 Int i; 5152 IRDirty* d; 5153 IRCAS* cas; 5154 switch (st->tag) { 5155 case Ist_AbiHint: 5156 aoccCount_Expr(uses, st->Ist.AbiHint.base); 5157 aoccCount_Expr(uses, st->Ist.AbiHint.nia); 5158 return; 5159 case Ist_WrTmp: 5160 aoccCount_Expr(uses, st->Ist.WrTmp.data); 5161 return; 5162 case Ist_Put: 5163 aoccCount_Expr(uses, st->Ist.Put.data); 5164 return; 5165 case Ist_PutI: 5166 aoccCount_Expr(uses, st->Ist.PutI.details->ix); 5167 aoccCount_Expr(uses, st->Ist.PutI.details->data); 5168 return; 5169 case Ist_Store: 5170 aoccCount_Expr(uses, st->Ist.Store.addr); 5171 aoccCount_Expr(uses, st->Ist.Store.data); 5172 return; 5173 case Ist_StoreG: { 5174 IRStoreG* sg = st->Ist.StoreG.details; 5175 aoccCount_Expr(uses, sg->addr); 5176 aoccCount_Expr(uses, sg->data); 5177 aoccCount_Expr(uses, sg->guard); 5178 return; 5179 } 5180 case Ist_LoadG: { 5181 IRLoadG* lg = st->Ist.LoadG.details; 5182 aoccCount_Expr(uses, lg->addr); 5183 aoccCount_Expr(uses, lg->alt); 5184 aoccCount_Expr(uses, lg->guard); 5185 return; 5186 } 5187 case Ist_CAS: 5188 cas = st->Ist.CAS.details; 5189 aoccCount_Expr(uses, cas->addr); 5190 if (cas->expdHi) 5191 aoccCount_Expr(uses, cas->expdHi); 5192 aoccCount_Expr(uses, cas->expdLo); 5193 if (cas->dataHi) 5194 aoccCount_Expr(uses, cas->dataHi); 5195 aoccCount_Expr(uses, cas->dataLo); 5196 return; 5197 case Ist_LLSC: 5198 aoccCount_Expr(uses, st->Ist.LLSC.addr); 5199 if (st->Ist.LLSC.storedata) 5200 aoccCount_Expr(uses, st->Ist.LLSC.storedata); 5201 return; 5202 case Ist_Dirty: 5203 d = st->Ist.Dirty.details; 5204 if (d->mFx != Ifx_None) 5205 aoccCount_Expr(uses, d->mAddr); 5206 aoccCount_Expr(uses, d->guard); 5207 for (i = 0; d->args[i]; i++) { 5208 IRExpr* arg = d->args[i]; 5209 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5210 aoccCount_Expr(uses, arg); 5211 } 5212 return; 5213 case Ist_NoOp: 5214 case Ist_IMark: 5215 case Ist_MBE: 5216 return; 5217 case Ist_Exit: 5218 aoccCount_Expr(uses, st->Ist.Exit.guard); 5219 return; 5220 default: 5221 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5222 vpanic("aoccCount_Stmt"); 5223 } 5224 } 5225 5226 /* Look up a binding for tmp in the env. If found, return the bound 5227 expression, and set the env's binding to NULL so it is marked as 5228 used. If not found, return NULL. */ 5229 5230 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 5231 { 5232 Int i; 5233 for (i = 0; i < A_NENV; i++) { 5234 if (env[i].binder == tmp && env[i].bindee != NULL) { 5235 IRExpr* bindee = env[i].bindee; 5236 env[i].bindee = NULL; 5237 return bindee; 5238 } 5239 } 5240 return NULL; 5241 } 5242 5243 /* Traverse e, looking for temps. For each observed temp, see if env 5244 contains a binding for the temp, and if so return the bound value. 5245 The env has the property that any binding it holds is 5246 'single-shot', so once a binding is used, it is marked as no longer 5247 available, by setting its .bindee field to NULL. */ 5248 5249 static inline Bool is_Unop ( IRExpr* e, IROp op ) { 5250 return e->tag == Iex_Unop && e->Iex.Unop.op == op; 5251 } 5252 static inline Bool is_Binop ( IRExpr* e, IROp op ) { 5253 return e->tag == Iex_Binop && e->Iex.Binop.op == op; 5254 } 5255 5256 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 5257 { 5258 switch (op) { 5259 case Iop_Or32: 5260 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ 5261 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) 5262 return IRExpr_Unop( Iop_CmpwNEZ32, 5263 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 5264 a2->Iex.Unop.arg ) ); 5265 break; 5266 5267 case Iop_CmpNE32: 5268 /* Since X has type Ity_I1 we can simplify: 5269 CmpNE32(1Uto32(X),0)) ==> X */ 5270 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2)) 5271 return a1->Iex.Unop.arg; 5272 break; 5273 5274 default: 5275 break; 5276 } 5277 /* no reduction rule applies */ 5278 return IRExpr_Binop( op, a1, a2 ); 5279 } 5280 5281 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) 5282 { 5283 switch (op) { 5284 case Iop_CmpwNEZ64: 5285 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */ 5286 if (is_Unop(aa, Iop_CmpwNEZ64)) 5287 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg ); 5288 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5289 if (is_Binop(aa, Iop_Or64) 5290 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) 5291 return fold_IRExpr_Unop( 5292 Iop_CmpwNEZ64, 5293 IRExpr_Binop(Iop_Or64, 5294 aa->Iex.Binop.arg1->Iex.Unop.arg, 5295 aa->Iex.Binop.arg2)); 5296 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5297 if (is_Binop(aa, Iop_Or64) 5298 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) 5299 return fold_IRExpr_Unop( 5300 Iop_CmpwNEZ64, 5301 IRExpr_Binop(Iop_Or64, 5302 aa->Iex.Binop.arg1, 5303 aa->Iex.Binop.arg2->Iex.Unop.arg)); 5304 break; 5305 case Iop_CmpNEZ64: 5306 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ 5307 if (is_Unop(aa, Iop_Left64)) 5308 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); 5309 /* CmpNEZ64( 1Uto64(X) ) --> X */ 5310 if (is_Unop(aa, Iop_1Uto64)) 5311 return aa->Iex.Unop.arg; 5312 break; 5313 case Iop_CmpwNEZ32: 5314 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ 5315 if (is_Unop(aa, Iop_CmpwNEZ32)) 5316 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); 5317 break; 5318 case Iop_CmpNEZ32: 5319 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ 5320 if (is_Unop(aa, Iop_Left32)) 5321 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); 5322 /* CmpNEZ32( 1Uto32(X) ) --> X */ 5323 if (is_Unop(aa, Iop_1Uto32)) 5324 return aa->Iex.Unop.arg; 5325 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */ 5326 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64)) 5327 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg); 5328 break; 5329 case Iop_CmpNEZ8: 5330 /* CmpNEZ8( 1Uto8(X) ) --> X */ 5331 if (is_Unop(aa, Iop_1Uto8)) 5332 return aa->Iex.Unop.arg; 5333 break; 5334 case Iop_Left32: 5335 /* Left32( Left32(x) ) --> Left32(x) */ 5336 if (is_Unop(aa, Iop_Left32)) 5337 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); 5338 break; 5339 case Iop_Left64: 5340 /* Left64( Left64(x) ) --> Left64(x) */ 5341 if (is_Unop(aa, Iop_Left64)) 5342 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg ); 5343 break; 5344 case Iop_ZeroHI64ofV128: 5345 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */ 5346 if (is_Unop(aa, Iop_ZeroHI64ofV128)) 5347 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg ); 5348 break; 5349 case Iop_32to1: 5350 /* 32to1( 1Uto32 ( x ) ) --> x */ 5351 if (is_Unop(aa, Iop_1Uto32)) 5352 return aa->Iex.Unop.arg; 5353 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ 5354 if (is_Unop(aa, Iop_CmpwNEZ32)) 5355 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); 5356 break; 5357 case Iop_64to1: 5358 /* 64to1( 1Uto64 ( x ) ) --> x */ 5359 if (is_Unop(aa, Iop_1Uto64)) 5360 return aa->Iex.Unop.arg; 5361 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ 5362 if (is_Unop(aa, Iop_CmpwNEZ64)) 5363 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); 5364 break; 5365 case Iop_64to32: 5366 /* 64to32( 32Uto64 ( x )) --> x */ 5367 if (is_Unop(aa, Iop_32Uto64)) 5368 return aa->Iex.Unop.arg; 5369 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */ 5370 if (is_Unop(aa, Iop_8Uto64)) 5371 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg); 5372 break; 5373 5374 case Iop_32Uto64: 5375 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */ 5376 if (is_Unop(aa, Iop_8Uto32)) 5377 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg); 5378 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */ 5379 if (is_Unop(aa, Iop_16Uto32)) 5380 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg); 5381 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh )) 5382 --> Shr64( 32Uto64(64to32(x)), sh )) */ 5383 if (is_Unop(aa, Iop_64to32) 5384 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64) 5385 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5386 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5387 Iop_64to32)) { 5388 return aa->Iex.Unop.arg; 5389 } 5390 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh )) 5391 --> 32Uto64(64to32( Shl64( x, sh )) */ 5392 if (is_Unop(aa, Iop_64to32) 5393 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64) 5394 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5395 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5396 Iop_64to32)) { 5397 return 5398 IRExpr_Unop( 5399 Iop_32Uto64, 5400 IRExpr_Unop( 5401 Iop_64to32, 5402 IRExpr_Binop( 5403 Iop_Shl64, 5404 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg, 5405 aa->Iex.Unop.arg->Iex.Binop.arg2 5406 ))); 5407 } 5408 break; 5409 5410 case Iop_1Sto32: 5411 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */ 5412 if (is_Unop(aa, Iop_CmpNEZ8) 5413 && is_Unop(aa->Iex.Unop.arg, Iop_32to8) 5414 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32) 5415 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg, 5416 Iop_CmpNEZ32)) { 5417 return IRExpr_Unop( Iop_CmpwNEZ32, 5418 aa->Iex.Unop.arg->Iex.Unop.arg 5419 ->Iex.Unop.arg->Iex.Unop.arg); 5420 } 5421 break; 5422 5423 default: 5424 break; 5425 } 5426 /* no reduction rule applies */ 5427 return IRExpr_Unop( op, aa ); 5428 } 5429 5430 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) 5431 { 5432 IRExpr* e2; 5433 IRExpr** args2; 5434 Int i; 5435 5436 switch (e->tag) { 5437 5438 case Iex_CCall: 5439 args2 = shallowCopyIRExprVec(e->Iex.CCall.args); 5440 for (i = 0; args2[i]; i++) 5441 args2[i] = atbSubst_Expr(env,args2[i]); 5442 return IRExpr_CCall( 5443 e->Iex.CCall.cee, 5444 e->Iex.CCall.retty, 5445 args2 5446 ); 5447 case Iex_RdTmp: 5448 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp); 5449 return e2 ? e2 : e; 5450 case Iex_ITE: 5451 return IRExpr_ITE( 5452 atbSubst_Expr(env, e->Iex.ITE.cond), 5453 atbSubst_Expr(env, e->Iex.ITE.iftrue), 5454 atbSubst_Expr(env, e->Iex.ITE.iffalse) 5455 ); 5456 case Iex_Qop: 5457 return IRExpr_Qop( 5458 e->Iex.Qop.details->op, 5459 atbSubst_Expr(env, e->Iex.Qop.details->arg1), 5460 atbSubst_Expr(env, e->Iex.Qop.details->arg2), 5461 atbSubst_Expr(env, e->Iex.Qop.details->arg3), 5462 atbSubst_Expr(env, e->Iex.Qop.details->arg4) 5463 ); 5464 case Iex_Triop: 5465 return IRExpr_Triop( 5466 e->Iex.Triop.details->op, 5467 atbSubst_Expr(env, e->Iex.Triop.details->arg1), 5468 atbSubst_Expr(env, e->Iex.Triop.details->arg2), 5469 atbSubst_Expr(env, e->Iex.Triop.details->arg3) 5470 ); 5471 case Iex_Binop: 5472 return fold_IRExpr_Binop( 5473 e->Iex.Binop.op, 5474 atbSubst_Expr(env, e->Iex.Binop.arg1), 5475 atbSubst_Expr(env, e->Iex.Binop.arg2) 5476 ); 5477 case Iex_Unop: 5478 return fold_IRExpr_Unop( 5479 e->Iex.Unop.op, 5480 atbSubst_Expr(env, e->Iex.Unop.arg) 5481 ); 5482 case Iex_Load: 5483 return IRExpr_Load( 5484 e->Iex.Load.end, 5485 e->Iex.Load.ty, 5486 atbSubst_Expr(env, e->Iex.Load.addr) 5487 ); 5488 case Iex_GetI: 5489 return IRExpr_GetI( 5490 e->Iex.GetI.descr, 5491 atbSubst_Expr(env, e->Iex.GetI.ix), 5492 e->Iex.GetI.bias 5493 ); 5494 case Iex_Const: 5495 case Iex_Get: 5496 return e; 5497 default: 5498 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5499 vpanic("atbSubst_Expr"); 5500 } 5501 } 5502 5503 /* Same deal as atbSubst_Expr, except for stmts. */ 5504 5505 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) 5506 { 5507 Int i; 5508 IRDirty *d, *d2; 5509 IRCAS *cas, *cas2; 5510 IRPutI *puti, *puti2; 5511 5512 switch (st->tag) { 5513 case Ist_AbiHint: 5514 return IRStmt_AbiHint( 5515 atbSubst_Expr(env, st->Ist.AbiHint.base), 5516 st->Ist.AbiHint.len, 5517 atbSubst_Expr(env, st->Ist.AbiHint.nia) 5518 ); 5519 case Ist_Store: 5520 return IRStmt_Store( 5521 st->Ist.Store.end, 5522 atbSubst_Expr(env, st->Ist.Store.addr), 5523 atbSubst_Expr(env, st->Ist.Store.data) 5524 ); 5525 case Ist_StoreG: { 5526 IRStoreG* sg = st->Ist.StoreG.details; 5527 return IRStmt_StoreG(sg->end, 5528 atbSubst_Expr(env, sg->addr), 5529 atbSubst_Expr(env, sg->data), 5530 atbSubst_Expr(env, sg->guard)); 5531 } 5532 case Ist_LoadG: { 5533 IRLoadG* lg = st->Ist.LoadG.details; 5534 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 5535 atbSubst_Expr(env, lg->addr), 5536 atbSubst_Expr(env, lg->alt), 5537 atbSubst_Expr(env, lg->guard)); 5538 } 5539 case Ist_WrTmp: 5540 return IRStmt_WrTmp( 5541 st->Ist.WrTmp.tmp, 5542 atbSubst_Expr(env, st->Ist.WrTmp.data) 5543 ); 5544 case Ist_Put: 5545 return IRStmt_Put( 5546 st->Ist.Put.offset, 5547 atbSubst_Expr(env, st->Ist.Put.data) 5548 ); 5549 case Ist_PutI: 5550 puti = st->Ist.PutI.details; 5551 puti2 = mkIRPutI(puti->descr, 5552 atbSubst_Expr(env, puti->ix), 5553 puti->bias, 5554 atbSubst_Expr(env, puti->data)); 5555 return IRStmt_PutI(puti2); 5556 5557 case Ist_Exit: 5558 return IRStmt_Exit( 5559 atbSubst_Expr(env, st->Ist.Exit.guard), 5560 st->Ist.Exit.jk, 5561 st->Ist.Exit.dst, 5562 st->Ist.Exit.offsIP 5563 ); 5564 case Ist_IMark: 5565 return IRStmt_IMark(st->Ist.IMark.addr, 5566 st->Ist.IMark.len, 5567 st->Ist.IMark.delta); 5568 case Ist_NoOp: 5569 return IRStmt_NoOp(); 5570 case Ist_MBE: 5571 return IRStmt_MBE(st->Ist.MBE.event); 5572 case Ist_CAS: 5573 cas = st->Ist.CAS.details; 5574 cas2 = mkIRCAS( 5575 cas->oldHi, cas->oldLo, cas->end, 5576 atbSubst_Expr(env, cas->addr), 5577 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL, 5578 atbSubst_Expr(env, cas->expdLo), 5579 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL, 5580 atbSubst_Expr(env, cas->dataLo) 5581 ); 5582 return IRStmt_CAS(cas2); 5583 case Ist_LLSC: 5584 return IRStmt_LLSC( 5585 st->Ist.LLSC.end, 5586 st->Ist.LLSC.result, 5587 atbSubst_Expr(env, st->Ist.LLSC.addr), 5588 st->Ist.LLSC.storedata 5589 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL 5590 ); 5591 case Ist_Dirty: 5592 d = st->Ist.Dirty.details; 5593 d2 = emptyIRDirty(); 5594 *d2 = *d; 5595 if (d2->mFx != Ifx_None) 5596 d2->mAddr = atbSubst_Expr(env, d2->mAddr); 5597 d2->guard = atbSubst_Expr(env, d2->guard); 5598 for (i = 0; d2->args[i]; i++) { 5599 IRExpr* arg = d2->args[i]; 5600 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5601 d2->args[i] = atbSubst_Expr(env, arg); 5602 } 5603 return IRStmt_Dirty(d2); 5604 default: 5605 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5606 vpanic("atbSubst_Stmt"); 5607 } 5608 } 5609 5610 inline 5611 static Bool dirty_helper_stores ( const IRDirty *d ) 5612 { 5613 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify; 5614 } 5615 5616 inline 5617 static Interval dirty_helper_puts ( 5618 const IRDirty *d, 5619 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5620 VexRegisterUpdates pxControl, 5621 /*OUT*/Bool *requiresPreciseMemExns 5622 ) 5623 { 5624 Int i; 5625 Interval interval; 5626 5627 /* Passing the guest state pointer opens the door to modifying the 5628 guest state under the covers. It's not allowed, but let's be 5629 extra conservative and assume the worst. */ 5630 for (i = 0; d->args[i]; i++) { 5631 if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) { 5632 *requiresPreciseMemExns = True; 5633 /* Assume all guest state is written. */ 5634 interval.present = True; 5635 interval.low = 0; 5636 interval.high = 0x7FFFFFFF; 5637 return interval; 5638 } 5639 } 5640 5641 /* Check the side effects on the guest state */ 5642 interval.present = False; 5643 interval.low = interval.high = -1; 5644 *requiresPreciseMemExns = False; 5645 5646 for (i = 0; i < d->nFxState; ++i) { 5647 if (d->fxState[i].fx != Ifx_Read) { 5648 Int offset = d->fxState[i].offset; 5649 Int size = d->fxState[i].size; 5650 Int nRepeats = d->fxState[i].nRepeats; 5651 Int repeatLen = d->fxState[i].repeatLen; 5652 5653 if (preciseMemExnsFn(offset, 5654 offset + nRepeats * repeatLen + size - 1, 5655 pxControl)) { 5656 *requiresPreciseMemExns = True; 5657 } 5658 update_interval(&interval, offset, 5659 offset + nRepeats * repeatLen + size - 1); 5660 } 5661 } 5662 5663 return interval; 5664 } 5665 5666 /* Return an interval if st modifies the guest state. Via 5667 requiresPreciseMemExns return whether or not that modification 5668 requires precise exceptions. */ 5669 static Interval stmt_modifies_guest_state ( 5670 IRSB *bb, const IRStmt *st, 5671 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5672 VexRegisterUpdates pxControl, 5673 /*OUT*/Bool *requiresPreciseMemExns 5674 ) 5675 { 5676 Interval interval; 5677 5678 switch (st->tag) { 5679 case Ist_Put: { 5680 Int offset = st->Ist.Put.offset; 5681 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data)); 5682 5683 *requiresPreciseMemExns 5684 = preciseMemExnsFn(offset, offset + size - 1, pxControl); 5685 interval.present = True; 5686 interval.low = offset; 5687 interval.high = offset + size - 1; 5688 return interval; 5689 } 5690 5691 case Ist_PutI: { 5692 IRRegArray *descr = st->Ist.PutI.details->descr; 5693 Int offset = descr->base; 5694 Int size = sizeofIRType(descr->elemTy); 5695 5696 /* We quietly assume here that all segments are contiguous and there 5697 are no holes. This is to avoid a loop. The assumption is conservative 5698 in the sense that we might report that precise memory exceptions are 5699 needed when in fact they are not. */ 5700 *requiresPreciseMemExns 5701 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1, 5702 pxControl); 5703 interval.present = True; 5704 interval.low = offset; 5705 interval.high = offset + descr->nElems * size - 1; 5706 return interval; 5707 } 5708 5709 case Ist_Dirty: 5710 return dirty_helper_puts(st->Ist.Dirty.details, 5711 preciseMemExnsFn, pxControl, 5712 requiresPreciseMemExns); 5713 5714 default: 5715 *requiresPreciseMemExns = False; 5716 interval.present = False; 5717 interval.low = -1; 5718 interval.high = -1; 5719 return interval; 5720 } 5721 } 5722 5723 /* notstatic */ Addr ado_treebuild_BB ( 5724 IRSB* bb, 5725 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5726 VexRegisterUpdates pxControl 5727 ) 5728 { 5729 Int i, j, k, m; 5730 Bool stmtStores, invalidateMe; 5731 Interval putInterval; 5732 IRStmt* st; 5733 IRStmt* st2; 5734 ATmpInfo env[A_NENV]; 5735 5736 Bool max_ga_known = False; 5737 Addr max_ga = 0; 5738 5739 Int n_tmps = bb->tyenv->types_used; 5740 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort)); 5741 5742 /* Phase 1. Scan forwards in bb, counting use occurrences of each 5743 temp. Also count occurrences in the bb->next field. Take the 5744 opportunity to also find the maximum guest address in the block, 5745 since that will be needed later for deciding when we can safely 5746 elide event checks. */ 5747 5748 for (i = 0; i < n_tmps; i++) 5749 uses[i] = 0; 5750 5751 for (i = 0; i < bb->stmts_used; i++) { 5752 st = bb->stmts[i]; 5753 switch (st->tag) { 5754 case Ist_NoOp: 5755 continue; 5756 case Ist_IMark: { 5757 UInt len = st->Ist.IMark.len; 5758 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1; 5759 max_ga_known = True; 5760 if (mga > max_ga) 5761 max_ga = mga; 5762 break; 5763 } 5764 default: 5765 break; 5766 } 5767 aoccCount_Stmt( uses, st ); 5768 } 5769 aoccCount_Expr(uses, bb->next ); 5770 5771 # if 0 5772 for (i = 0; i < n_tmps; i++) { 5773 if (uses[i] == 0) 5774 continue; 5775 ppIRTemp( (IRTemp)i ); 5776 vex_printf(" used %d\n", (Int)uses[i] ); 5777 } 5778 # endif 5779 5780 /* Phase 2. Scan forwards in bb. For each statement in turn: 5781 5782 If the env is full, emit the end element. This guarantees 5783 there is at least one free slot in the following. 5784 5785 On seeing 't = E', occ(t)==1, 5786 let E'=env(E) 5787 delete this stmt 5788 add t -> E' to the front of the env 5789 Examine E' and set the hints for E' appropriately 5790 (doesLoad? doesGet?) 5791 5792 On seeing any other stmt, 5793 let stmt' = env(stmt) 5794 remove from env any 't=E' binds invalidated by stmt 5795 emit the invalidated stmts 5796 emit stmt' 5797 compact any holes in env 5798 by sliding entries towards the front 5799 5800 Finally, apply env to bb->next. 5801 */ 5802 5803 for (i = 0; i < A_NENV; i++) { 5804 env[i].bindee = NULL; 5805 env[i].binder = IRTemp_INVALID; 5806 } 5807 5808 /* The stmts in bb are being reordered, and we are guaranteed to 5809 end up with no more than the number we started with. Use i to 5810 be the cursor of the current stmt examined and j <= i to be that 5811 for the current stmt being written. 5812 */ 5813 j = 0; 5814 for (i = 0; i < bb->stmts_used; i++) { 5815 5816 st = bb->stmts[i]; 5817 if (st->tag == Ist_NoOp) 5818 continue; 5819 5820 /* Ensure there's at least one space in the env, by emitting 5821 the oldest binding if necessary. */ 5822 if (env[A_NENV-1].bindee != NULL) { 5823 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder, 5824 env[A_NENV-1].bindee ); 5825 j++; 5826 vassert(j <= i); 5827 env[A_NENV-1].bindee = NULL; 5828 } 5829 5830 /* Consider current stmt. */ 5831 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) { 5832 IRExpr *e, *e2; 5833 5834 /* optional extra: dump dead bindings as we find them. 5835 Removes the need for a prior dead-code removal pass. */ 5836 if (uses[st->Ist.WrTmp.tmp] == 0) { 5837 if (0) vex_printf("DEAD binding\n"); 5838 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5839 } 5840 vassert(uses[st->Ist.WrTmp.tmp] == 1); 5841 5842 /* ok, we have 't = E', occ(t)==1. Do the abovementioned 5843 actions. */ 5844 e = st->Ist.WrTmp.data; 5845 e2 = atbSubst_Expr(env, e); 5846 addToEnvFront(env, st->Ist.WrTmp.tmp, e2); 5847 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2); 5848 /* don't advance j, as we are deleting this stmt and instead 5849 holding it temporarily in the env. */ 5850 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5851 } 5852 5853 /* we get here for any other kind of statement. */ 5854 /* 'use up' any bindings required by the current statement. */ 5855 st2 = atbSubst_Stmt(env, st); 5856 5857 /* Now, before this stmt, dump any bindings in env that it 5858 invalidates. These need to be dumped in the order in which 5859 they originally entered env -- that means from oldest to 5860 youngest. */ 5861 5862 /* putInterval/stmtStores characterise what the stmt under 5863 consideration does, or might do (sidely safe @ True). */ 5864 5865 Bool putRequiresPreciseMemExns; 5866 putInterval = stmt_modifies_guest_state( 5867 bb, st, preciseMemExnsFn, pxControl, 5868 &putRequiresPreciseMemExns 5869 ); 5870 5871 /* be True if this stmt writes memory or might do (==> we don't 5872 want to reorder other loads or stores relative to it). Also, 5873 both LL and SC fall under this classification, since we 5874 really ought to be conservative and not reorder any other 5875 memory transactions relative to them. */ 5876 stmtStores 5877 = toBool( st->tag == Ist_Store 5878 || (st->tag == Ist_Dirty 5879 && dirty_helper_stores(st->Ist.Dirty.details)) 5880 || st->tag == Ist_LLSC 5881 || st->tag == Ist_CAS ); 5882 5883 for (k = A_NENV-1; k >= 0; k--) { 5884 if (env[k].bindee == NULL) 5885 continue; 5886 /* Compare the actions of this stmt with the actions of 5887 binding 'k', to see if they invalidate the binding. */ 5888 invalidateMe 5889 = toBool( 5890 /* a store invalidates loaded data */ 5891 (env[k].doesLoad && stmtStores) 5892 /* a put invalidates get'd data, if they overlap */ 5893 || ((env[k].getInterval.present && putInterval.present) && 5894 intervals_overlap(env[k].getInterval, putInterval)) 5895 /* a put invalidates loaded data. That means, in essense, that 5896 a load expression cannot be substituted into a statement 5897 that follows the put. But there is nothing wrong doing so 5898 except when the put statement requries precise exceptions. 5899 Think of a load that is moved past a put where the put 5900 updates the IP in the guest state. If the load generates 5901 a segfault, the wrong address (line number) would be 5902 reported. */ 5903 || (env[k].doesLoad && putInterval.present && 5904 putRequiresPreciseMemExns) 5905 /* probably overly conservative: a memory bus event 5906 invalidates absolutely everything, so that all 5907 computation prior to it is forced to complete before 5908 proceeding with the event (fence,lock,unlock). */ 5909 || st->tag == Ist_MBE 5910 /* also be (probably overly) paranoid re AbiHints */ 5911 || st->tag == Ist_AbiHint 5912 ); 5913 if (invalidateMe) { 5914 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee ); 5915 j++; 5916 vassert(j <= i); 5917 env[k].bindee = NULL; 5918 } 5919 } 5920 5921 /* Slide in-use entries in env up to the front */ 5922 m = 0; 5923 for (k = 0; k < A_NENV; k++) { 5924 if (env[k].bindee != NULL) { 5925 env[m] = env[k]; 5926 m++; 5927 } 5928 } 5929 for (m = m; m < A_NENV; m++) { 5930 env[m].bindee = NULL; 5931 } 5932 5933 /* finally, emit the substituted statement */ 5934 bb->stmts[j] = st2; 5935 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */ 5936 j++; 5937 5938 vassert(j <= i+1); 5939 } /* for each stmt in the original bb ... */ 5940 5941 /* Finally ... substitute the ->next field as much as possible, and 5942 dump any left-over bindings. Hmm. Perhaps there should be no 5943 left over bindings? Or any left-over bindings are 5944 by definition dead? */ 5945 bb->next = atbSubst_Expr(env, bb->next); 5946 bb->stmts_used = j; 5947 5948 return max_ga_known ? max_ga : ~(Addr)0; 5949 } 5950 5951 5952 /*---------------------------------------------------------------*/ 5953 /*--- MSVC specific transformation hacks ---*/ 5954 /*---------------------------------------------------------------*/ 5955 5956 /* The purpose of all this is to find MSVC's idiom for non-constant 5957 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into 5958 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has 5959 generates a lot of false positives from the MSVC version because it 5960 doesn't understand that XORing an undefined bit with itself gives a 5961 defined result. 5962 5963 This isn't a problem for the simple case "x ^ x", because iropt 5964 folds it to a constant zero before Memcheck ever sees it. But in 5965 this case we have an intervening "& c" which defeats the simple 5966 case. So we have to carefully inspect all expressions rooted at an 5967 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the 5968 7 other variants resulting from swapping the order of arguments to 5969 the three binary operations. If we get a match, we then replace 5970 the tree with "(a & ~c) | (b & c)", and Memcheck is happy. 5971 5972 The key difficulty is to spot the two uses of "a". To normalise 5973 the IR to maximise the chances of success, we first do a CSE pass, 5974 with CSEing of loads enabled, since the two "a" expressions may be 5975 loads, which need to be commoned up. Then we do a constant folding 5976 pass, so as to remove any tmp-to-tmp assignment chains that would 5977 make matching the original expression more difficult. 5978 */ 5979 5980 5981 /* Helper function for debug printing */ 5982 __attribute__((unused)) 5983 static void print_flat_expr ( IRExpr** env, IRExpr* e ) 5984 { 5985 if (e == NULL) { 5986 vex_printf("?"); 5987 return; 5988 } 5989 switch (e->tag) { 5990 case Iex_Binop: { 5991 ppIROp(e->Iex.Binop.op); 5992 vex_printf("("); 5993 print_flat_expr(env, e->Iex.Binop.arg1); 5994 vex_printf(","); 5995 print_flat_expr(env, e->Iex.Binop.arg2); 5996 vex_printf(")"); 5997 break; 5998 } 5999 case Iex_Unop: { 6000 ppIROp(e->Iex.Unop.op); 6001 vex_printf("("); 6002 print_flat_expr(env, e->Iex.Unop.arg); 6003 vex_printf(")"); 6004 break; 6005 } 6006 case Iex_RdTmp: 6007 ppIRTemp(e->Iex.RdTmp.tmp); 6008 vex_printf("="); 6009 print_flat_expr(env, chase(env, e)); 6010 break; 6011 case Iex_Const: 6012 case Iex_CCall: 6013 case Iex_Load: 6014 case Iex_ITE: 6015 case Iex_Get: 6016 ppIRExpr(e); 6017 break; 6018 default: 6019 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n"); 6020 vassert(0); 6021 } 6022 } 6023 6024 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms) 6025 or any of the other 7 variants generated by switching the order 6026 of arguments to the outer ^, the inner ^ and the &. 6027 */ 6028 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb, 6029 /*OUT*/IRExpr** cc, 6030 IRExpr** env, IRExpr* e, 6031 IROp opAND, IROp opXOR) 6032 { 6033 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \ 6034 && (_e)->Iex.Binop.op == (_op)) 6035 # define ISATOM(_e) isIRAtom(_e) 6036 # define STEP(_e) chase1(env, (_e)) 6037 # define LL(_e) ((_e)->Iex.Binop.arg1) 6038 # define RR(_e) ((_e)->Iex.Binop.arg2) 6039 6040 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR; 6041 6042 /* This is common to all 8 cases */ 6043 if (!ISBIN(e, opXOR)) goto fail; 6044 6045 /* -----and------ */ 6046 /* --xor--- */ 6047 /* find variant 1: a1 ^ ((a2 ^ b) & c) */ 6048 /* find variant 2: a1 ^ ((b ^ a2) & c) */ 6049 a1 = and = xor = c = a2bL = a2bR = NULL; 6050 6051 a1 = LL(e); 6052 and = STEP(RR(e)); 6053 if (!ISBIN(and, opAND)) goto v34; 6054 xor = STEP(LL(and)); 6055 c = RR(and); 6056 if (!ISBIN(xor, opXOR)) goto v34; 6057 a2bL = LL(xor); 6058 a2bR = RR(xor); 6059 6060 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6061 *aa = a1; 6062 *bb = a2bR; 6063 *cc = c; 6064 return 1; 6065 } 6066 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6067 *aa = a1; 6068 *bb = a2bL; 6069 *cc = c; 6070 return 2; 6071 } 6072 6073 v34: 6074 /* -----and------ */ 6075 /* --xor--- */ 6076 /* find variant 3: ((a2 ^ b) & c) ^ a1 */ 6077 /* find variant 4: ((b ^ a2) & c) ^ a1 */ 6078 a1 = and = xor = c = a2bL = a2bR = NULL; 6079 6080 a1 = RR(e); 6081 and = STEP(LL(e)); 6082 if (!ISBIN(and, opAND)) goto v56; 6083 xor = STEP(LL(and)); 6084 c = RR(and); 6085 if (!ISBIN(xor, opXOR)) goto v56; 6086 a2bL = LL(xor); 6087 a2bR = RR(xor); 6088 6089 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6090 *aa = a1; 6091 *bb = a2bR; 6092 *cc = c; 6093 return 3; 6094 } 6095 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6096 *aa = a1; 6097 *bb = a2bL; 6098 *cc = c; 6099 return 4; 6100 } 6101 6102 v56: 6103 /* -----and------ */ 6104 /* --xor--- */ 6105 /* find variant 5: a1 ^ (c & (a2 ^ b)) */ 6106 /* find variant 6: a1 ^ (c & (b ^ a2)) */ 6107 a1 = and = xor = c = a2bL = a2bR = NULL; 6108 6109 a1 = LL(e); 6110 and = STEP(RR(e)); 6111 if (!ISBIN(and, opAND)) goto v78; 6112 xor = STEP(RR(and)); 6113 c = LL(and); 6114 if (!ISBIN(xor, opXOR)) goto v78; 6115 a2bL = LL(xor); 6116 a2bR = RR(xor); 6117 6118 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6119 *aa = a1; 6120 *bb = a2bR; 6121 *cc = c; 6122 vassert(5-5); // ATC 6123 return 5; 6124 } 6125 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6126 *aa = a1; 6127 *bb = a2bL; 6128 *cc = c; 6129 vassert(6-6); // ATC 6130 return 6; 6131 } 6132 6133 v78: 6134 /* -----and------ */ 6135 /* --xor--- */ 6136 /* find variant 7: (c & (a2 ^ b)) ^ a1 */ 6137 /* find variant 8: (c & (b ^ a2)) ^ a1 */ 6138 a1 = and = xor = c = a2bL = a2bR = NULL; 6139 6140 a1 = RR(e); 6141 and = STEP(LL(e)); 6142 if (!ISBIN(and, opAND)) goto fail; 6143 xor = STEP(RR(and)); 6144 c = LL(and); 6145 if (!ISBIN(xor, opXOR)) goto fail; 6146 a2bL = LL(xor); 6147 a2bR = RR(xor); 6148 6149 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6150 *aa = a1; 6151 *bb = a2bR; 6152 *cc = c; 6153 return 7; 6154 } 6155 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6156 *aa = a1; 6157 *bb = a2bL; 6158 *cc = c; 6159 return 8; 6160 } 6161 6162 fail: 6163 return 0; 6164 6165 # undef ISBIN 6166 # undef ISATOM 6167 # undef STEP 6168 # undef LL 6169 # undef RR 6170 } 6171 6172 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other 6173 variants thereof generated by switching arguments around), return 6174 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */ 6175 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e ) 6176 { 6177 if (e->tag != Iex_Binop) 6178 return NULL; 6179 6180 const HChar* tyNm = NULL; 6181 IROp opOR = Iop_INVALID; 6182 IROp opAND = Iop_INVALID; 6183 IROp opNOT = Iop_INVALID; 6184 IROp opXOR = Iop_INVALID; 6185 switch (e->Iex.Binop.op) { 6186 case Iop_Xor32: 6187 tyNm = "I32"; 6188 opOR = Iop_Or32; opAND = Iop_And32; 6189 opNOT = Iop_Not32; opXOR = Iop_Xor32; 6190 break; 6191 case Iop_Xor16: 6192 tyNm = "I16"; 6193 opOR = Iop_Or16; opAND = Iop_And16; 6194 opNOT = Iop_Not16; opXOR = Iop_Xor16; 6195 break; 6196 case Iop_Xor8: 6197 tyNm = "I8"; 6198 opOR = Iop_Or8; opAND = Iop_And8; 6199 opNOT = Iop_Not8; opXOR = Iop_Xor8; 6200 break; 6201 default: 6202 return NULL; 6203 } 6204 6205 IRExpr* aa = NULL; 6206 IRExpr* bb = NULL; 6207 IRExpr* cc = NULL; 6208 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR); 6209 if (variant > 0) { 6210 static UInt ctr = 0; 6211 if (0) 6212 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, " 6213 "type %s, variant %u\n", 6214 ++ctr, tyNm, variant); 6215 /* It's vitally important that the returned aa, bb and cc are 6216 atoms -- either constants or tmps. If it's anything else 6217 (eg, a GET) then incorporating them in a tree at this point 6218 in the SB may erroneously pull them forwards (eg of a PUT 6219 that originally was after the GET) and so transform the IR 6220 wrongly. spotBitfieldAssignment should guarantee only to 6221 give us atoms, but we check here anyway. */ 6222 vassert(aa && isIRAtom(aa)); 6223 vassert(bb && isIRAtom(bb)); 6224 vassert(cc && isIRAtom(cc)); 6225 return IRExpr_Binop( 6226 opOR, 6227 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)), 6228 IRExpr_Binop(opAND, bb, cc) 6229 ); 6230 } 6231 return NULL; 6232 } 6233 6234 6235 /* SB is modified in-place. Visit all the IRExprs and, for those 6236 which are allowed to be non-atomic, perform the XOR transform if 6237 possible. This makes |sb| be non-flat, but that's ok, the caller 6238 can re-flatten it. Returns True iff any changes were made. */ 6239 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb ) 6240 { 6241 Int i; 6242 Bool changed = False; 6243 6244 /* Make the tmp->expr environment, so we can use it for 6245 chasing expressions. */ 6246 Int n_tmps = sb->tyenv->types_used; 6247 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); 6248 for (i = 0; i < n_tmps; i++) 6249 env[i] = NULL; 6250 6251 for (i = 0; i < sb->stmts_used; i++) { 6252 IRStmt* st = sb->stmts[i]; 6253 if (st->tag != Ist_WrTmp) 6254 continue; 6255 IRTemp t = st->Ist.WrTmp.tmp; 6256 vassert(t >= 0 && t < n_tmps); 6257 env[t] = st->Ist.WrTmp.data; 6258 } 6259 6260 for (i = 0; i < sb->stmts_used; i++) { 6261 IRStmt* st = sb->stmts[i]; 6262 6263 switch (st->tag) { 6264 case Ist_AbiHint: 6265 vassert(isIRAtom(st->Ist.AbiHint.base)); 6266 vassert(isIRAtom(st->Ist.AbiHint.nia)); 6267 break; 6268 case Ist_Put: 6269 vassert(isIRAtom(st->Ist.Put.data)); 6270 break; 6271 case Ist_PutI: { 6272 IRPutI* puti = st->Ist.PutI.details; 6273 vassert(isIRAtom(puti->ix)); 6274 vassert(isIRAtom(puti->data)); 6275 break; 6276 } 6277 case Ist_WrTmp: { 6278 /* This is the one place where an expr (st->Ist.WrTmp.data) is 6279 allowed to be more than just a constant or a tmp. */ 6280 IRExpr* mb_new_data 6281 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data); 6282 if (mb_new_data) { 6283 //ppIRSB(sb); 6284 st->Ist.WrTmp.data = mb_new_data; 6285 //ppIRSB(sb); 6286 changed = True; 6287 } 6288 break; 6289 } 6290 case Ist_Store: 6291 vassert(isIRAtom(st->Ist.Store.addr)); 6292 vassert(isIRAtom(st->Ist.Store.data)); 6293 break; 6294 case Ist_StoreG: { 6295 IRStoreG* sg = st->Ist.StoreG.details; 6296 vassert(isIRAtom(sg->addr)); 6297 vassert(isIRAtom(sg->data)); 6298 vassert(isIRAtom(sg->guard)); 6299 break; 6300 } 6301 case Ist_LoadG: { 6302 IRLoadG* lg = st->Ist.LoadG.details; 6303 vassert(isIRAtom(lg->addr)); 6304 vassert(isIRAtom(lg->alt)); 6305 vassert(isIRAtom(lg->guard)); 6306 break; 6307 } 6308 case Ist_CAS: { 6309 IRCAS* cas = st->Ist.CAS.details; 6310 vassert(isIRAtom(cas->addr)); 6311 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 6312 vassert(isIRAtom(cas->expdLo)); 6313 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 6314 vassert(isIRAtom(cas->dataLo)); 6315 break; 6316 } 6317 case Ist_LLSC: 6318 vassert(isIRAtom(st->Ist.LLSC.addr)); 6319 if (st->Ist.LLSC.storedata) 6320 vassert(isIRAtom(st->Ist.LLSC.storedata)); 6321 break; 6322 case Ist_Dirty: { 6323 IRDirty* d = st->Ist.Dirty.details; 6324 if (d->mFx != Ifx_None) { 6325 vassert(isIRAtom(d->mAddr)); 6326 } 6327 vassert(isIRAtom(d->guard)); 6328 for (Int j = 0; d->args[j]; j++) { 6329 IRExpr* arg = d->args[j]; 6330 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { 6331 vassert(isIRAtom(arg)); 6332 } 6333 } 6334 break; 6335 } 6336 case Ist_IMark: 6337 case Ist_NoOp: 6338 case Ist_MBE: 6339 break; 6340 case Ist_Exit: 6341 vassert(isIRAtom(st->Ist.Exit.guard)); 6342 break; 6343 default: 6344 vex_printf("\n"); ppIRStmt(st); 6345 vpanic("do_XOR_TRANSFORMS_IRSB"); 6346 } 6347 } 6348 6349 vassert(isIRAtom(sb->next)); 6350 return changed; 6351 } 6352 6353 6354 static IRSB* do_MSVC_HACKS ( IRSB* sb ) 6355 { 6356 // Normalise as much as we can. This is the one-and-only place 6357 // where we call do_cse_BB with allowLoadsToBeCSEd set to True. 6358 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ ); 6359 if (any_cse_changes) { 6360 // CSEing might have created dead code. Remove it. 6361 sb = cprop_BB ( sb ); 6362 do_deadcode_BB(sb); 6363 } 6364 6365 // Visit all atoms, do the transformation proper. bb is modified 6366 // in-place. 6367 Bool changed = do_XOR_TRANSFORM_IRSB(sb); 6368 6369 if (changed) { 6370 // The transformation generates non-flat expressions, so we now 6371 // need to re-flatten the block. 6372 sb = flatten_BB(sb); 6373 } 6374 6375 return sb; 6376 } 6377 6378 6379 /*---------------------------------------------------------------*/ 6380 /*--- iropt main ---*/ 6381 /*---------------------------------------------------------------*/ 6382 6383 static Bool iropt_verbose = False; /* True; */ 6384 6385 6386 /* Do a simple cleanup pass on bb. This is: redundant Get removal, 6387 redundant Put removal, constant propagation, dead code removal, 6388 clean helper specialisation, and dead code removal (again). 6389 */ 6390 6391 6392 static 6393 IRSB* cheap_transformations ( 6394 IRSB* bb, 6395 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 6396 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 6397 VexRegisterUpdates pxControl 6398 ) 6399 { 6400 redundant_get_removal_BB ( bb ); 6401 if (iropt_verbose) { 6402 vex_printf("\n========= REDUNDANT GET\n\n" ); 6403 ppIRSB(bb); 6404 } 6405 6406 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6407 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl ); 6408 } 6409 if (iropt_verbose) { 6410 vex_printf("\n========= REDUNDANT PUT\n\n" ); 6411 ppIRSB(bb); 6412 } 6413 6414 bb = cprop_BB ( bb ); 6415 if (iropt_verbose) { 6416 vex_printf("\n========= CPROPD\n\n" ); 6417 ppIRSB(bb); 6418 } 6419 6420 do_deadcode_BB ( bb ); 6421 if (iropt_verbose) { 6422 vex_printf("\n========= DEAD\n\n" ); 6423 ppIRSB(bb); 6424 } 6425 6426 bb = spec_helpers_BB ( bb, specHelper ); 6427 do_deadcode_BB ( bb ); 6428 if (iropt_verbose) { 6429 vex_printf("\n========= SPECd \n\n" ); 6430 ppIRSB(bb); 6431 } 6432 6433 return bb; 6434 } 6435 6436 6437 /* Do some more expensive transformations on bb, which are aimed at 6438 optimising as much as possible in the presence of GetI and PutI. */ 6439 6440 static 6441 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl ) 6442 { 6443 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6444 collapse_AddSub_chains_BB( bb ); 6445 do_redundant_GetI_elimination( bb ); 6446 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6447 do_redundant_PutI_elimination( bb, pxControl ); 6448 } 6449 do_deadcode_BB( bb ); 6450 return bb; 6451 } 6452 6453 6454 /* Scan a flattened BB to look for signs that more expensive 6455 optimisations might be useful: 6456 - find out if there are any GetIs and PutIs 6457 - find out if there are any floating or vector-typed temporaries 6458 */ 6459 6460 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, 6461 /*OUT*/Bool* hasVorFtemps, 6462 IRSB* bb ) 6463 { 6464 Int i, j; 6465 IRStmt* st; 6466 IRDirty* d; 6467 IRCAS* cas; 6468 6469 *hasGetIorPutI = False; 6470 *hasVorFtemps = False; 6471 6472 for (i = 0; i < bb->stmts_used; i++) { 6473 st = bb->stmts[i]; 6474 switch (st->tag) { 6475 case Ist_AbiHint: 6476 vassert(isIRAtom(st->Ist.AbiHint.base)); 6477 vassert(isIRAtom(st->Ist.AbiHint.nia)); 6478 break; 6479 case Ist_PutI: 6480 *hasGetIorPutI = True; 6481 break; 6482 case Ist_WrTmp: 6483 if (st->Ist.WrTmp.data->tag == Iex_GetI) 6484 *hasGetIorPutI = True; 6485 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) { 6486 case Ity_I1: case Ity_I8: case Ity_I16: 6487 case Ity_I32: case Ity_I64: case Ity_I128: 6488 break; 6489 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128: 6490 case Ity_V128: case Ity_V256: 6491 *hasVorFtemps = True; 6492 break; 6493 case Ity_D32: case Ity_D64: case Ity_D128: 6494 *hasVorFtemps = True; 6495 break; 6496 default: 6497 goto bad; 6498 } 6499 break; 6500 case Ist_Put: 6501 vassert(isIRAtom(st->Ist.Put.data)); 6502 break; 6503 case Ist_Store: 6504 vassert(isIRAtom(st->Ist.Store.addr)); 6505 vassert(isIRAtom(st->Ist.Store.data)); 6506 break; 6507 case Ist_StoreG: { 6508 IRStoreG* sg = st->Ist.StoreG.details; 6509 vassert(isIRAtom(sg->addr)); 6510 vassert(isIRAtom(sg->data)); 6511 vassert(isIRAtom(sg->guard)); 6512 break; 6513 } 6514 case Ist_LoadG: { 6515 IRLoadG* lg = st->Ist.LoadG.details; 6516 vassert(isIRAtom(lg->addr)); 6517 vassert(isIRAtom(lg->alt)); 6518 vassert(isIRAtom(lg->guard)); 6519 break; 6520 } 6521 case Ist_CAS: 6522 cas = st->Ist.CAS.details; 6523 vassert(isIRAtom(cas->addr)); 6524 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 6525 vassert(isIRAtom(cas->expdLo)); 6526 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 6527 vassert(isIRAtom(cas->dataLo)); 6528 break; 6529 case Ist_LLSC: 6530 vassert(isIRAtom(st->Ist.LLSC.addr)); 6531 if (st->Ist.LLSC.storedata) 6532 vassert(isIRAtom(st->Ist.LLSC.storedata)); 6533 break; 6534 case Ist_Dirty: 6535 d = st->Ist.Dirty.details; 6536 vassert(isIRAtom(d->guard)); 6537 for (j = 0; d->args[j]; j++) { 6538 IRExpr* arg = d->args[j]; 6539 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 6540 vassert(isIRAtom(arg)); 6541 } 6542 if (d->mFx != Ifx_None) 6543 vassert(isIRAtom(d->mAddr)); 6544 break; 6545 case Ist_NoOp: 6546 case Ist_IMark: 6547 case Ist_MBE: 6548 break; 6549 case Ist_Exit: 6550 vassert(isIRAtom(st->Ist.Exit.guard)); 6551 break; 6552 default: 6553 bad: 6554 ppIRStmt(st); 6555 vpanic("considerExpensives"); 6556 } 6557 } 6558 } 6559 6560 6561 /* ---------------- The main iropt entry point. ---------------- */ 6562 6563 /* exported from this file */ 6564 /* Rules of the game: 6565 6566 - IRExpr/IRStmt trees should be treated as immutable, as they 6567 may get shared. So never change a field of such a tree node; 6568 instead construct and return a new one if needed. 6569 */ 6570 6571 6572 IRSB* do_iropt_BB( 6573 IRSB* bb0, 6574 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 6575 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 6576 VexRegisterUpdates pxControl, 6577 Addr guest_addr, 6578 VexArch guest_arch 6579 ) 6580 { 6581 static Int n_total = 0; 6582 static Int n_expensive = 0; 6583 6584 Bool hasGetIorPutI, hasVorFtemps; 6585 IRSB *bb, *bb2; 6586 6587 n_total++; 6588 6589 /* First flatten the block out, since all other 6590 phases assume flat code. */ 6591 6592 bb = flatten_BB ( bb0 ); 6593 6594 if (iropt_verbose) { 6595 vex_printf("\n========= FLAT\n\n" ); 6596 ppIRSB(bb); 6597 } 6598 6599 /* If at level 0, stop now. */ 6600 if (vex_control.iropt_level <= 0) return bb; 6601 6602 /* Now do a preliminary cleanup pass, and figure out if we also 6603 need to do 'expensive' optimisations. Expensive optimisations 6604 are deemed necessary if the block contains any GetIs or PutIs. 6605 If needed, do expensive transformations and then another cheap 6606 cleanup pass. */ 6607 6608 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl ); 6609 6610 if (guest_arch == VexArchARM) { 6611 /* Translating Thumb2 code produces a lot of chaff. We have to 6612 work extra hard to get rid of it. */ 6613 bb = cprop_BB(bb); 6614 bb = spec_helpers_BB ( bb, specHelper ); 6615 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6616 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl ); 6617 } 6618 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6619 do_deadcode_BB( bb ); 6620 } 6621 6622 if (vex_control.iropt_level > 1) { 6623 6624 /* Peer at what we have, to decide how much more effort to throw 6625 at it. */ 6626 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb ); 6627 6628 if (hasVorFtemps && !hasGetIorPutI) { 6629 /* If any evidence of FP or Vector activity, CSE, as that 6630 tends to mop up all manner of lardy code to do with 6631 rounding modes. Don't bother if hasGetIorPutI since that 6632 case leads into the expensive transformations, which do 6633 CSE anyway. */ 6634 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6635 do_deadcode_BB( bb ); 6636 } 6637 6638 if (hasGetIorPutI) { 6639 Bool cses; 6640 n_expensive++; 6641 if (DEBUG_IROPT) 6642 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive); 6643 bb = expensive_transformations( bb, pxControl ); 6644 bb = cheap_transformations( bb, specHelper, 6645 preciseMemExnsFn, pxControl ); 6646 /* Potentially common up GetIs */ 6647 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6648 if (cses) 6649 bb = cheap_transformations( bb, specHelper, 6650 preciseMemExnsFn, pxControl ); 6651 } 6652 6653 /////////////////////////////////////////////////////////// 6654 // BEGIN MSVC optimised code transformation hacks 6655 if (0) 6656 bb = do_MSVC_HACKS(bb); 6657 // END MSVC optimised code transformation hacks 6658 /////////////////////////////////////////////////////////// 6659 6660 /* Now have a go at unrolling simple (single-BB) loops. If 6661 successful, clean up the results as much as possible. */ 6662 6663 bb2 = maybe_loop_unroll_BB( bb, guest_addr ); 6664 if (bb2) { 6665 bb = cheap_transformations( bb2, specHelper, 6666 preciseMemExnsFn, pxControl ); 6667 if (hasGetIorPutI) { 6668 bb = expensive_transformations( bb, pxControl ); 6669 bb = cheap_transformations( bb, specHelper, 6670 preciseMemExnsFn, pxControl ); 6671 } else { 6672 /* at least do CSE and dead code removal */ 6673 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6674 do_deadcode_BB( bb ); 6675 } 6676 if (0) vex_printf("vex iropt: unrolled a loop\n"); 6677 } 6678 6679 } 6680 6681 return bb; 6682 } 6683 6684 6685 /*---------------------------------------------------------------*/ 6686 /*--- end ir_opt.c ---*/ 6687 /*---------------------------------------------------------------*/ 6688