1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 3 /*---------------------------------------------------------------*/ 4 /*--- begin ir_opt.c ---*/ 5 /*---------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2004-2013 OpenWorks LLP 12 info (at) open-works.net 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 27 02110-1301, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 31 Neither the names of the U.S. Department of Energy nor the 32 University of California nor the names of its contributors may be 33 used to endorse or promote products derived from this software 34 without prior written permission. 35 */ 36 37 #include "libvex_basictypes.h" 38 #include "libvex_ir.h" 39 #include "libvex.h" 40 41 #include "main_util.h" 42 #include "main_globals.h" 43 #include "ir_opt.h" 44 45 46 /* Set to 1 for lots of debugging output. */ 47 #define DEBUG_IROPT 0 48 49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */ 50 #define STATS_IROPT 0 51 52 53 /* What iropt does, 29 Dec 04. 54 55 It takes an IRSB and produces a new one with the same meaning, 56 defined thus: 57 58 After execution of the new BB, all guest state and guest memory is 59 the same as after execution of the original. This is true 60 regardless of how the block was exited (at the end vs side exit). 61 62 In addition, parts of the guest state will be identical to that 63 created by execution of the original at the following observation 64 points: 65 66 * In a dirty helper call, any parts of the guest state that the 67 helper states that it reads or modifies will be up to date. 68 Also, guest memory will be up to date. Parts of the guest state 69 not marked as being read or modified by the helper cannot be 70 assumed to be up-to-date at the point where the helper is called. 71 72 * If iropt_register_updates == VexRegUpdSpAtMemAccess : 73 The guest state is only up to date only as explained above 74 (i.e. at SB exits and as specified by dirty helper call). 75 Also, the stack pointer register is up to date at memory 76 exception points (as this is needed for the stack extension 77 logic in m_signals.c). 78 79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess : 80 Immediately prior to any load or store, those parts of the guest 81 state marked as requiring precise exceptions will be up to date. 82 Also, guest memory will be up to date. Parts of the guest state 83 not marked as requiring precise exceptions cannot be assumed to 84 be up-to-date at the point of the load/store. 85 86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess: 87 Same as minimal, but all the guest state is up to date at memory 88 exception points. 89 90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn : 91 Guest state is up to date at each instruction. 92 93 The relative order of loads and stores (including loads/stores of 94 guest memory done by dirty helpers annotated as such) is not 95 changed. However, the relative order of loads with no intervening 96 stores/modifies may be changed. 97 98 Transformation order 99 ~~~~~~~~~~~~~~~~~~~~ 100 101 There are three levels of optimisation, controlled by 102 vex_control.iropt_level. Define first: 103 104 "Cheap transformations" are the following sequence: 105 * Redundant-Get removal 106 * Redundant-Put removal 107 * Constant propagation/folding 108 * Dead code removal 109 * Specialisation of clean helper functions 110 * Dead code removal 111 112 "Expensive transformations" are the following sequence: 113 * CSE 114 * Folding of add/sub chains 115 * Redundant-GetI removal 116 * Redundant-PutI removal 117 * Dead code removal 118 119 Then the transformations are as follows, as defined by 120 vex_control.iropt_level: 121 122 Level 0: 123 * Flatten into atomic form. 124 125 Level 1: the following sequence: 126 * Flatten into atomic form. 127 * Cheap transformations. 128 129 Level 2: the following sequence 130 * Flatten into atomic form. 131 * Cheap transformations. 132 * If block contains any floating or vector types, CSE. 133 * If block contains GetI or PutI, Expensive transformations. 134 * Try unrolling loops. Three possible outcomes: 135 - No effect: do nothing more. 136 - Unrolled a loop, and block does not contain GetI or PutI: 137 Do: * CSE 138 * Dead code removal 139 - Unrolled a loop, and block contains GetI or PutI: 140 Do: * Expensive transformations 141 * Cheap transformations 142 */ 143 144 /* Implementation notes, 29 Dec 04. 145 146 TODO (important): I think rPutI removal ignores precise exceptions 147 and is therefore in a sense, wrong. In the sense that PutIs are 148 assumed not to write parts of the guest state that we need to have 149 up-to-date at loads/stores. So far on x86 guest that has not 150 mattered since indeed only the x87 FP registers and tags are 151 accessed using GetI/PutI, and there is no need so far for them to 152 be up to date at mem exception points. The rPutI pass should be 153 fixed. 154 155 TODO: improve pessimistic handling of precise exceptions 156 in the tree builder. 157 158 TODO: check interaction of rGetI and dirty helpers. 159 160 F64i constants are treated differently from other constants. 161 They are not regarded as atoms, and instead lifted off and 162 bound to temps. This allows them to participate in CSE, which 163 is important for getting good performance for x86 guest code. 164 165 CSE up F64 literals (already doing F64is) 166 167 CSE: consider carefully the requirement for precise exns 168 prior to making CSE any more aggressive. */ 169 170 171 /*---------------------------------------------------------------*/ 172 /*--- Finite mappery, of a sort ---*/ 173 /*---------------------------------------------------------------*/ 174 175 /* General map from HWord-sized thing HWord-sized thing. Could be by 176 hashing, but it's not clear whether or not this would really be any 177 faster. */ 178 179 typedef 180 struct { 181 Bool* inuse; 182 HWord* key; 183 HWord* val; 184 Int size; 185 Int used; 186 } 187 HashHW; 188 189 static HashHW* newHHW ( void ) 190 { 191 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW)); 192 h->size = 8; 193 h->used = 0; 194 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool)); 195 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord)); 196 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord)); 197 return h; 198 } 199 200 201 /* Look up key in the map. */ 202 203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key ) 204 { 205 Int i; 206 /* vex_printf("lookupHHW(%llx)\n", key ); */ 207 for (i = 0; i < h->used; i++) { 208 if (h->inuse[i] && h->key[i] == key) { 209 if (val) 210 *val = h->val[i]; 211 return True; 212 } 213 } 214 return False; 215 } 216 217 218 /* Add key->val to the map. Replaces any existing binding for key. */ 219 220 static void addToHHW ( HashHW* h, HWord key, HWord val ) 221 { 222 Int i, j; 223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */ 224 225 /* Find and replace existing binding, if any. */ 226 for (i = 0; i < h->used; i++) { 227 if (h->inuse[i] && h->key[i] == key) { 228 h->val[i] = val; 229 return; 230 } 231 } 232 233 /* Ensure a space is available. */ 234 if (h->used == h->size) { 235 /* Copy into arrays twice the size. */ 236 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool)); 237 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord)); 238 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord)); 239 for (i = j = 0; i < h->size; i++) { 240 if (!h->inuse[i]) continue; 241 inuse2[j] = True; 242 key2[j] = h->key[i]; 243 val2[j] = h->val[i]; 244 j++; 245 } 246 h->used = j; 247 h->size *= 2; 248 h->inuse = inuse2; 249 h->key = key2; 250 h->val = val2; 251 } 252 253 /* Finally, add it. */ 254 vassert(h->used < h->size); 255 h->inuse[h->used] = True; 256 h->key[h->used] = key; 257 h->val[h->used] = val; 258 h->used++; 259 } 260 261 262 /*---------------------------------------------------------------*/ 263 /*--- Flattening out a BB into atomic SSA form ---*/ 264 /*---------------------------------------------------------------*/ 265 266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp 267 copies made by flattening. If in doubt return False. */ 268 269 static Bool isFlat ( IRExpr* e ) 270 { 271 if (e->tag == Iex_Get) 272 return True; 273 if (e->tag == Iex_Binop) 274 return toBool( isIRAtom(e->Iex.Binop.arg1) 275 && isIRAtom(e->Iex.Binop.arg2) ); 276 if (e->tag == Iex_Load) 277 return isIRAtom(e->Iex.Load.addr); 278 return False; 279 } 280 281 /* Flatten out 'ex' so it is atomic, returning a new expression with 282 the same value, after having appended extra IRTemp assignments to 283 the end of 'bb'. */ 284 285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) 286 { 287 Int i; 288 IRExpr** newargs; 289 IRType ty = typeOfIRExpr(bb->tyenv, ex); 290 IRTemp t1; 291 292 switch (ex->tag) { 293 294 case Iex_GetI: 295 t1 = newIRTemp(bb->tyenv, ty); 296 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 297 IRExpr_GetI(ex->Iex.GetI.descr, 298 flatten_Expr(bb, ex->Iex.GetI.ix), 299 ex->Iex.GetI.bias))); 300 return IRExpr_RdTmp(t1); 301 302 case Iex_Get: 303 t1 = newIRTemp(bb->tyenv, ty); 304 addStmtToIRSB(bb, 305 IRStmt_WrTmp(t1, ex)); 306 return IRExpr_RdTmp(t1); 307 308 case Iex_Qop: { 309 IRQop* qop = ex->Iex.Qop.details; 310 t1 = newIRTemp(bb->tyenv, ty); 311 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 312 IRExpr_Qop(qop->op, 313 flatten_Expr(bb, qop->arg1), 314 flatten_Expr(bb, qop->arg2), 315 flatten_Expr(bb, qop->arg3), 316 flatten_Expr(bb, qop->arg4)))); 317 return IRExpr_RdTmp(t1); 318 } 319 320 case Iex_Triop: { 321 IRTriop* triop = ex->Iex.Triop.details; 322 t1 = newIRTemp(bb->tyenv, ty); 323 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 324 IRExpr_Triop(triop->op, 325 flatten_Expr(bb, triop->arg1), 326 flatten_Expr(bb, triop->arg2), 327 flatten_Expr(bb, triop->arg3)))); 328 return IRExpr_RdTmp(t1); 329 } 330 331 case Iex_Binop: 332 t1 = newIRTemp(bb->tyenv, ty); 333 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 334 IRExpr_Binop(ex->Iex.Binop.op, 335 flatten_Expr(bb, ex->Iex.Binop.arg1), 336 flatten_Expr(bb, ex->Iex.Binop.arg2)))); 337 return IRExpr_RdTmp(t1); 338 339 case Iex_Unop: 340 t1 = newIRTemp(bb->tyenv, ty); 341 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 342 IRExpr_Unop(ex->Iex.Unop.op, 343 flatten_Expr(bb, ex->Iex.Unop.arg)))); 344 return IRExpr_RdTmp(t1); 345 346 case Iex_Load: 347 t1 = newIRTemp(bb->tyenv, ty); 348 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 349 IRExpr_Load(ex->Iex.Load.end, 350 ex->Iex.Load.ty, 351 flatten_Expr(bb, ex->Iex.Load.addr)))); 352 return IRExpr_RdTmp(t1); 353 354 case Iex_CCall: 355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); 356 for (i = 0; newargs[i]; i++) 357 newargs[i] = flatten_Expr(bb, newargs[i]); 358 t1 = newIRTemp(bb->tyenv, ty); 359 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 360 IRExpr_CCall(ex->Iex.CCall.cee, 361 ex->Iex.CCall.retty, 362 newargs))); 363 return IRExpr_RdTmp(t1); 364 365 case Iex_ITE: 366 t1 = newIRTemp(bb->tyenv, ty); 367 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond), 369 flatten_Expr(bb, ex->Iex.ITE.iftrue), 370 flatten_Expr(bb, ex->Iex.ITE.iffalse)))); 371 return IRExpr_RdTmp(t1); 372 373 case Iex_Const: 374 /* Lift F64i constants out onto temps so they can be CSEd 375 later. */ 376 if (ex->Iex.Const.con->tag == Ico_F64i) { 377 t1 = newIRTemp(bb->tyenv, ty); 378 addStmtToIRSB(bb, IRStmt_WrTmp(t1, 379 IRExpr_Const(ex->Iex.Const.con))); 380 return IRExpr_RdTmp(t1); 381 } else { 382 /* Leave all other constants alone. */ 383 return ex; 384 } 385 386 case Iex_RdTmp: 387 return ex; 388 389 default: 390 vex_printf("\n"); 391 ppIRExpr(ex); 392 vex_printf("\n"); 393 vpanic("flatten_Expr"); 394 } 395 } 396 397 398 /* Append a completely flattened form of 'st' to the end of 'bb'. */ 399 400 static void flatten_Stmt ( IRSB* bb, IRStmt* st ) 401 { 402 Int i; 403 IRExpr *e1, *e2, *e3, *e4, *e5; 404 IRDirty *d, *d2; 405 IRCAS *cas, *cas2; 406 IRPutI *puti, *puti2; 407 IRLoadG *lg; 408 IRStoreG *sg; 409 switch (st->tag) { 410 case Ist_Put: 411 if (isIRAtom(st->Ist.Put.data)) { 412 /* optimisation to reduce the amount of heap wasted 413 by the flattener */ 414 addStmtToIRSB(bb, st); 415 } else { 416 /* general case, always correct */ 417 e1 = flatten_Expr(bb, st->Ist.Put.data); 418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); 419 } 420 break; 421 case Ist_PutI: 422 puti = st->Ist.PutI.details; 423 e1 = flatten_Expr(bb, puti->ix); 424 e2 = flatten_Expr(bb, puti->data); 425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2); 426 addStmtToIRSB(bb, IRStmt_PutI(puti2)); 427 break; 428 case Ist_WrTmp: 429 if (isFlat(st->Ist.WrTmp.data)) { 430 /* optimisation, to reduce the number of tmp-tmp 431 copies generated */ 432 addStmtToIRSB(bb, st); 433 } else { 434 /* general case, always correct */ 435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data); 436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); 437 } 438 break; 439 case Ist_Store: 440 e1 = flatten_Expr(bb, st->Ist.Store.addr); 441 e2 = flatten_Expr(bb, st->Ist.Store.data); 442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); 443 break; 444 case Ist_StoreG: 445 sg = st->Ist.StoreG.details; 446 e1 = flatten_Expr(bb, sg->addr); 447 e2 = flatten_Expr(bb, sg->data); 448 e3 = flatten_Expr(bb, sg->guard); 449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3)); 450 break; 451 case Ist_LoadG: 452 lg = st->Ist.LoadG.details; 453 e1 = flatten_Expr(bb, lg->addr); 454 e2 = flatten_Expr(bb, lg->alt); 455 e3 = flatten_Expr(bb, lg->guard); 456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 457 e1, e2, e3)); 458 break; 459 case Ist_CAS: 460 cas = st->Ist.CAS.details; 461 e1 = flatten_Expr(bb, cas->addr); 462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; 463 e3 = flatten_Expr(bb, cas->expdLo); 464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; 465 e5 = flatten_Expr(bb, cas->dataLo); 466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, 467 e1, e2, e3, e4, e5 ); 468 addStmtToIRSB(bb, IRStmt_CAS(cas2)); 469 break; 470 case Ist_LLSC: 471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr); 472 e2 = st->Ist.LLSC.storedata 473 ? flatten_Expr(bb, st->Ist.LLSC.storedata) 474 : NULL; 475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, 476 st->Ist.LLSC.result, e1, e2)); 477 break; 478 case Ist_Dirty: 479 d = st->Ist.Dirty.details; 480 d2 = emptyIRDirty(); 481 *d2 = *d; 482 d2->args = shallowCopyIRExprVec(d2->args); 483 if (d2->mFx != Ifx_None) { 484 d2->mAddr = flatten_Expr(bb, d2->mAddr); 485 } else { 486 vassert(d2->mAddr == NULL); 487 } 488 d2->guard = flatten_Expr(bb, d2->guard); 489 for (i = 0; d2->args[i]; i++) { 490 IRExpr* arg = d2->args[i]; 491 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 492 d2->args[i] = flatten_Expr(bb, arg); 493 } 494 addStmtToIRSB(bb, IRStmt_Dirty(d2)); 495 break; 496 case Ist_NoOp: 497 case Ist_MBE: 498 case Ist_IMark: 499 addStmtToIRSB(bb, st); 500 break; 501 case Ist_AbiHint: 502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base); 503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); 504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); 505 break; 506 case Ist_Exit: 507 e1 = flatten_Expr(bb, st->Ist.Exit.guard); 508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, 509 st->Ist.Exit.dst, 510 st->Ist.Exit.offsIP)); 511 break; 512 default: 513 vex_printf("\n"); 514 ppIRStmt(st); 515 vex_printf("\n"); 516 vpanic("flatten_Stmt"); 517 } 518 } 519 520 521 static IRSB* flatten_BB ( IRSB* in ) 522 { 523 Int i; 524 IRSB* out; 525 out = emptyIRSB(); 526 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 527 for (i = 0; i < in->stmts_used; i++) 528 if (in->stmts[i]) 529 flatten_Stmt( out, in->stmts[i] ); 530 out->next = flatten_Expr( out, in->next ); 531 out->jumpkind = in->jumpkind; 532 out->offsIP = in->offsIP; 533 return out; 534 } 535 536 537 /*---------------------------------------------------------------*/ 538 /*--- In-place removal of redundant GETs ---*/ 539 /*---------------------------------------------------------------*/ 540 541 /* Scan forwards, building up an environment binding (min offset, max 542 offset) pairs to values, which will either be temps or constants. 543 544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the 545 env and if it matches, replace the Get with the stored value. If 546 there is no match, add a (minoff,maxoff) :-> t binding. 547 548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env 549 any binding which fully or partially overlaps with (minoff,maxoff). 550 Then add a new (minoff,maxoff) :-> t or c binding. */ 551 552 /* Extract the min/max offsets from a guest state array descriptor. */ 553 554 inline 555 static void getArrayBounds ( IRRegArray* descr, 556 UInt* minoff, UInt* maxoff ) 557 { 558 *minoff = descr->base; 559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1; 560 vassert((*minoff & ~0xFFFF) == 0); 561 vassert((*maxoff & ~0xFFFF) == 0); 562 vassert(*minoff <= *maxoff); 563 } 564 565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */ 566 567 static UInt mk_key_GetPut ( Int offset, IRType ty ) 568 { 569 /* offset should fit in 16 bits. */ 570 UInt minoff = offset; 571 UInt maxoff = minoff + sizeofIRType(ty) - 1; 572 vassert((minoff & ~0xFFFF) == 0); 573 vassert((maxoff & ~0xFFFF) == 0); 574 return (minoff << 16) | maxoff; 575 } 576 577 static UInt mk_key_GetIPutI ( IRRegArray* descr ) 578 { 579 UInt minoff, maxoff; 580 getArrayBounds( descr, &minoff, &maxoff ); 581 vassert((minoff & ~0xFFFF) == 0); 582 vassert((maxoff & ~0xFFFF) == 0); 583 return (minoff << 16) | maxoff; 584 } 585 586 /* Supposing h has keys of the form generated by mk_key_GetPut and 587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo 588 .. k_hi). 589 */ 590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) 591 { 592 Int j; 593 UInt e_lo, e_hi; 594 vassert(k_lo <= k_hi); 595 /* invalidate any env entries which in any way overlap (k_lo 596 .. k_hi) */ 597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */ 598 599 for (j = 0; j < h->used; j++) { 600 if (!h->inuse[j]) 601 continue; 602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF; 603 e_hi = ((UInt)h->key[j]) & 0xFFFF; 604 vassert(e_lo <= e_hi); 605 if (e_hi < k_lo || k_hi < e_lo) 606 continue; /* no overlap possible */ 607 else 608 /* overlap; invalidate */ 609 h->inuse[j] = False; 610 } 611 } 612 613 614 static void redundant_get_removal_BB ( IRSB* bb ) 615 { 616 HashHW* env = newHHW(); 617 UInt key = 0; /* keep gcc -O happy */ 618 Int i, j; 619 HWord val; 620 621 for (i = 0; i < bb->stmts_used; i++) { 622 IRStmt* st = bb->stmts[i]; 623 624 if (st->tag == Ist_NoOp) 625 continue; 626 627 /* Deal with Gets */ 628 if (st->tag == Ist_WrTmp 629 && st->Ist.WrTmp.data->tag == Iex_Get) { 630 /* st is 't = Get(...)'. Look up in the environment and see 631 if the Get can be replaced. */ 632 IRExpr* get = st->Ist.WrTmp.data; 633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset, 634 get->Iex.Get.ty ); 635 if (lookupHHW(env, &val, (HWord)key)) { 636 /* found it */ 637 /* Note, we could do better here. If the types are 638 different we don't do the substitution, since doing so 639 could lead to invalidly-typed IR. An improvement would 640 be to stick in a reinterpret-style cast, although that 641 would make maintaining flatness more difficult. */ 642 IRExpr* valE = (IRExpr*)val; 643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) 644 == st->Ist.WrTmp.data->Iex.Get.ty ); 645 if (typesOK && DEBUG_IROPT) { 646 vex_printf("rGET: "); ppIRExpr(get); 647 vex_printf(" -> "); ppIRExpr(valE); 648 vex_printf("\n"); 649 } 650 if (typesOK) 651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); 652 } else { 653 /* Not found, but at least we know that t and the Get(...) 654 are now associated. So add a binding to reflect that 655 fact. */ 656 addToHHW( env, (HWord)key, 657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) ); 658 } 659 } 660 661 /* Deal with Puts: invalidate any env entries overlapped by this 662 Put */ 663 if (st->tag == Ist_Put || st->tag == Ist_PutI) { 664 UInt k_lo, k_hi; 665 if (st->tag == Ist_Put) { 666 key = mk_key_GetPut( st->Ist.Put.offset, 667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 668 } else { 669 vassert(st->tag == Ist_PutI); 670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 671 } 672 673 k_lo = (key >> 16) & 0xFFFF; 674 k_hi = key & 0xFFFF; 675 invalidateOverlaps(env, k_lo, k_hi); 676 } 677 else 678 if (st->tag == Ist_Dirty) { 679 /* Deal with dirty helpers which write or modify guest state. 680 Invalidate the entire env. We could do a lot better 681 here. */ 682 IRDirty* d = st->Ist.Dirty.details; 683 Bool writes = False; 684 for (j = 0; j < d->nFxState; j++) { 685 if (d->fxState[j].fx == Ifx_Modify 686 || d->fxState[j].fx == Ifx_Write) 687 writes = True; 688 } 689 if (writes) { 690 /* dump the entire env (not clever, but correct ...) */ 691 for (j = 0; j < env->used; j++) 692 env->inuse[j] = False; 693 if (0) vex_printf("rGET: trash env due to dirty helper\n"); 694 } 695 } 696 697 /* add this one to the env, if appropriate */ 698 if (st->tag == Ist_Put) { 699 vassert(isIRAtom(st->Ist.Put.data)); 700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); 701 } 702 703 } /* for (i = 0; i < bb->stmts_used; i++) */ 704 705 } 706 707 708 /*---------------------------------------------------------------*/ 709 /*--- In-place removal of redundant PUTs ---*/ 710 /*---------------------------------------------------------------*/ 711 712 /* Find any Get uses in st and invalidate any partially or fully 713 overlapping ranges listed in env. Due to the flattening phase, the 714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ 715 716 static void handle_gets_Stmt ( 717 HashHW* env, 718 IRStmt* st, 719 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 720 VexRegisterUpdates pxControl 721 ) 722 { 723 Int j; 724 UInt key = 0; /* keep gcc -O happy */ 725 Bool isGet; 726 Bool memRW = False; 727 IRExpr* e; 728 729 switch (st->tag) { 730 731 /* This is the only interesting case. Deal with Gets in the RHS 732 expression. */ 733 case Ist_WrTmp: 734 e = st->Ist.WrTmp.data; 735 switch (e->tag) { 736 case Iex_Get: 737 isGet = True; 738 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty ); 739 break; 740 case Iex_GetI: 741 isGet = True; 742 key = mk_key_GetIPutI ( e->Iex.GetI.descr ); 743 break; 744 case Iex_Load: 745 isGet = False; 746 memRW = True; 747 break; 748 default: 749 isGet = False; 750 } 751 if (isGet) { 752 UInt k_lo, k_hi; 753 k_lo = (key >> 16) & 0xFFFF; 754 k_hi = key & 0xFFFF; 755 invalidateOverlaps(env, k_lo, k_hi); 756 } 757 break; 758 759 /* Be very conservative for dirty helper calls; dump the entire 760 environment. The helper might read guest state, in which 761 case it needs to be flushed first. Also, the helper might 762 access guest memory, in which case all parts of the guest 763 state requiring precise exceptions needs to be flushed. The 764 crude solution is just to flush everything; we could easily 765 enough do a lot better if needed. */ 766 /* Probably also overly-conservative, but also dump everything 767 if we hit a memory bus event (fence, lock, unlock). Ditto 768 AbiHints, CASs, LLs and SCs. */ 769 case Ist_AbiHint: 770 vassert(isIRAtom(st->Ist.AbiHint.base)); 771 vassert(isIRAtom(st->Ist.AbiHint.nia)); 772 /* fall through */ 773 case Ist_MBE: 774 case Ist_Dirty: 775 case Ist_CAS: 776 case Ist_LLSC: 777 for (j = 0; j < env->used; j++) 778 env->inuse[j] = False; 779 break; 780 781 /* all other cases are boring. */ 782 case Ist_Store: 783 vassert(isIRAtom(st->Ist.Store.addr)); 784 vassert(isIRAtom(st->Ist.Store.data)); 785 memRW = True; 786 break; 787 case Ist_StoreG: { 788 IRStoreG* sg = st->Ist.StoreG.details; 789 vassert(isIRAtom(sg->addr)); 790 vassert(isIRAtom(sg->data)); 791 vassert(isIRAtom(sg->guard)); 792 memRW = True; 793 break; 794 } 795 case Ist_LoadG: { 796 IRLoadG* lg = st->Ist.LoadG.details; 797 vassert(isIRAtom(lg->addr)); 798 vassert(isIRAtom(lg->alt)); 799 vassert(isIRAtom(lg->guard)); 800 memRW = True; 801 break; 802 } 803 case Ist_Exit: 804 vassert(isIRAtom(st->Ist.Exit.guard)); 805 break; 806 807 case Ist_Put: 808 vassert(isIRAtom(st->Ist.Put.data)); 809 break; 810 811 case Ist_PutI: 812 vassert(isIRAtom(st->Ist.PutI.details->ix)); 813 vassert(isIRAtom(st->Ist.PutI.details->data)); 814 break; 815 816 case Ist_NoOp: 817 case Ist_IMark: 818 break; 819 820 default: 821 vex_printf("\n"); 822 ppIRStmt(st); 823 vex_printf("\n"); 824 vpanic("handle_gets_Stmt"); 825 } 826 827 if (memRW) { 828 /* This statement accesses memory. So we might need to dump all parts 829 of the environment corresponding to guest state that may not 830 be reordered with respect to memory references. That means 831 at least the stack pointer. */ 832 switch (pxControl) { 833 case VexRegUpdAllregsAtMemAccess: 834 /* Precise exceptions required at mem access. 835 Flush all guest state. */ 836 for (j = 0; j < env->used; j++) 837 env->inuse[j] = False; 838 break; 839 case VexRegUpdSpAtMemAccess: 840 /* We need to dump the stack pointer 841 (needed for stack extension in m_signals.c). 842 preciseMemExnsFn will use vex_control.iropt_register_updates 843 to verify only the sp is to be checked. */ 844 /* fallthrough */ 845 case VexRegUpdUnwindregsAtMemAccess: 846 for (j = 0; j < env->used; j++) { 847 if (!env->inuse[j]) 848 continue; 849 /* Just flush the minimal amount required, as computed by 850 preciseMemExnsFn. */ 851 HWord k_lo = (env->key[j] >> 16) & 0xFFFF; 852 HWord k_hi = env->key[j] & 0xFFFF; 853 if (preciseMemExnsFn( k_lo, k_hi, pxControl )) 854 env->inuse[j] = False; 855 } 856 break; 857 case VexRegUpdAllregsAtEachInsn: 858 // VexRegUpdAllregsAtEachInsn cannot happen here. 859 // fall through 860 case VexRegUpd_INVALID: 861 default: 862 vassert(0); 863 } 864 } /* if (memRW) */ 865 866 } 867 868 869 /* Scan backwards, building up a set of (min offset, max 870 offset) pairs, indicating those parts of the guest state 871 for which the next event is a write. 872 873 On seeing a conditional exit, empty the set. 874 875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is 876 completely within the set, remove the Put. Otherwise, add 877 (minoff,maxoff) to the set. 878 879 On seeing 'Get (minoff,maxoff)', remove any part of the set 880 overlapping (minoff,maxoff). The same has to happen for any events 881 which implicitly read parts of the guest state: dirty helper calls 882 and loads/stores. 883 */ 884 885 static void redundant_put_removal_BB ( 886 IRSB* bb, 887 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 888 VexRegisterUpdates pxControl 889 ) 890 { 891 Int i, j; 892 Bool isPut; 893 IRStmt* st; 894 UInt key = 0; /* keep gcc -O happy */ 895 896 vassert(pxControl < VexRegUpdAllregsAtEachInsn); 897 898 HashHW* env = newHHW(); 899 900 /* Initialise the running env with the fact that the final exit 901 writes the IP (or, whatever it claims to write. We don't 902 care.) */ 903 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); 904 addToHHW(env, (HWord)key, 0); 905 906 /* And now scan backwards through the statements. */ 907 for (i = bb->stmts_used-1; i >= 0; i--) { 908 st = bb->stmts[i]; 909 910 if (st->tag == Ist_NoOp) 911 continue; 912 913 /* Deal with conditional exits. */ 914 if (st->tag == Ist_Exit) { 915 //Bool re_add; 916 /* Need to throw out from the env, any part of it which 917 doesn't overlap with the guest state written by this exit. 918 Since the exit only writes one section, it's simplest to 919 do this: (1) check whether env contains a write that 920 completely overlaps the write done by this exit; (2) empty 921 out env; and (3) if (1) was true, add the write done by 922 this exit. 923 924 To make (1) a bit simpler, merely search for a write that 925 exactly matches the one done by this exit. That's safe 926 because it will fail as often or more often than a full 927 overlap check, and failure to find an overlapping write in 928 env is the safe case (we just nuke env if that 929 happens). */ 930 //vassert(isIRAtom(st->Ist.Exit.guard)); 931 /* (1) */ 932 //key = mk_key_GetPut(st->Ist.Exit.offsIP, 933 // typeOfIRConst(st->Ist.Exit.dst)); 934 //re_add = lookupHHW(env, NULL, key); 935 /* (2) */ 936 for (j = 0; j < env->used; j++) 937 env->inuse[j] = False; 938 /* (3) */ 939 //if (0 && re_add) 940 // addToHHW(env, (HWord)key, 0); 941 continue; 942 } 943 944 /* Deal with Puts */ 945 switch (st->tag) { 946 case Ist_Put: 947 isPut = True; 948 key = mk_key_GetPut( st->Ist.Put.offset, 949 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 950 vassert(isIRAtom(st->Ist.Put.data)); 951 break; 952 case Ist_PutI: 953 isPut = True; 954 key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); 955 vassert(isIRAtom(st->Ist.PutI.details->ix)); 956 vassert(isIRAtom(st->Ist.PutI.details->data)); 957 break; 958 default: 959 isPut = False; 960 } 961 if (isPut && st->tag != Ist_PutI) { 962 /* See if any single entry in env overlaps this Put. This is 963 simplistic in that the transformation is valid if, say, two 964 or more entries in the env overlap this Put, but the use of 965 lookupHHW will only find a single entry which exactly 966 overlaps this Put. This is suboptimal but safe. */ 967 if (lookupHHW(env, NULL, (HWord)key)) { 968 /* This Put is redundant because a later one will overwrite 969 it. So NULL (nop) it out. */ 970 if (DEBUG_IROPT) { 971 vex_printf("rPUT: "); ppIRStmt(st); 972 vex_printf("\n"); 973 } 974 bb->stmts[i] = IRStmt_NoOp(); 975 } else { 976 /* We can't demonstrate that this Put is redundant, so add it 977 to the running collection. */ 978 addToHHW(env, (HWord)key, 0); 979 } 980 continue; 981 } 982 983 /* Deal with Gets. These remove bits of the environment since 984 appearance of a Get means that the next event for that slice 985 of the guest state is no longer a write, but a read. Also 986 deals with implicit reads of guest state needed to maintain 987 precise exceptions. */ 988 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl ); 989 } 990 } 991 992 993 /*---------------------------------------------------------------*/ 994 /*--- Constant propagation and folding ---*/ 995 /*---------------------------------------------------------------*/ 996 997 #if STATS_IROPT 998 /* How often sameIRExprs was invoked */ 999 static UInt invocation_count; 1000 /* How often sameIRExprs recursed through IRTemp assignments */ 1001 static UInt recursion_count; 1002 /* How often sameIRExprs found identical IRExprs */ 1003 static UInt success_count; 1004 /* How often recursing through assignments to IRTemps helped 1005 establishing equality. */ 1006 static UInt recursion_success_count; 1007 /* Whether or not recursing through an IRTemp assignment helped 1008 establishing IRExpr equality for a given sameIRExprs invocation. */ 1009 static Bool recursion_helped; 1010 /* Whether or not a given sameIRExprs invocation recursed through an 1011 IRTemp assignment */ 1012 static Bool recursed; 1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */ 1014 static UInt max_nodes_visited; 1015 #endif /* STATS_IROPT */ 1016 1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */ 1018 static UInt num_nodes_visited; 1019 1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs. 1021 This is to guard against performance degradation by visiting large 1022 trees without success. */ 1023 #define NODE_LIMIT 30 1024 1025 1026 /* The env in this section is a map from IRTemp to IRExpr*, 1027 that is, an array indexed by IRTemp. */ 1028 1029 /* Do both expressions compute the same value? The answer is generally 1030 conservative, i.e. it will report that the expressions do not compute 1031 the same value when in fact they do. The reason is that we do not 1032 keep track of changes in the guest state and memory. Thusly, two 1033 Get's, GetI's or Load's, even when accessing the same location, will be 1034 assumed to compute different values. After all the accesses may happen 1035 at different times and the guest state / memory can have changed in 1036 the meantime. 1037 1038 XXX IMPORTANT XXX the two expressions must have the same IR type. 1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */ 1040 1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable 1042 wrapper that deals with the common tags-don't-match case, and a 1043 slower out of line general case. Saves a few insns. */ 1044 1045 __attribute__((noinline)) 1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); 1047 1048 inline 1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1050 { 1051 if (e1->tag != e2->tag) return False; 1052 return sameIRExprs_aux2(env, e1, e2); 1053 } 1054 1055 __attribute__((noinline)) 1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1057 { 1058 if (num_nodes_visited++ > NODE_LIMIT) return False; 1059 1060 switch (e1->tag) { 1061 case Iex_RdTmp: 1062 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; 1063 1064 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { 1065 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp], 1066 env[e2->Iex.RdTmp.tmp]); 1067 #if STATS_IROPT 1068 recursed = True; 1069 if (same) recursion_helped = True; 1070 #endif 1071 return same; 1072 } 1073 return False; 1074 1075 case Iex_Get: 1076 case Iex_GetI: 1077 case Iex_Load: 1078 /* Guest state / memory could have changed in the meantime. */ 1079 return False; 1080 1081 case Iex_Binop: 1082 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op 1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg1, 1084 e2->Iex.Binop.arg1 ) 1085 && sameIRExprs_aux( env, e1->Iex.Binop.arg2, 1086 e2->Iex.Binop.arg2 )); 1087 1088 case Iex_Unop: 1089 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op 1090 && sameIRExprs_aux( env, e1->Iex.Unop.arg, 1091 e2->Iex.Unop.arg )); 1092 1093 case Iex_Const: { 1094 IRConst *c1 = e1->Iex.Const.con; 1095 IRConst *c2 = e2->Iex.Const.con; 1096 vassert(c1->tag == c2->tag); 1097 switch (c1->tag) { 1098 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 ); 1099 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 ); 1100 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 ); 1101 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 ); 1102 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 ); 1103 default: break; 1104 } 1105 return False; 1106 } 1107 1108 case Iex_Triop: { 1109 IRTriop *tri1 = e1->Iex.Triop.details; 1110 IRTriop *tri2 = e2->Iex.Triop.details; 1111 return toBool( tri1->op == tri2->op 1112 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 ) 1113 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 ) 1114 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 )); 1115 } 1116 1117 case Iex_ITE: 1118 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond, 1119 e2->Iex.ITE.cond ) 1120 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue, 1121 e2->Iex.ITE.iftrue ) 1122 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse, 1123 e2->Iex.ITE.iffalse )); 1124 1125 default: 1126 /* Not very likely to be "same". */ 1127 break; 1128 } 1129 1130 return False; 1131 } 1132 1133 inline 1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) 1135 { 1136 Bool same; 1137 1138 num_nodes_visited = 0; 1139 same = sameIRExprs_aux(env, e1, e2); 1140 1141 #if STATS_IROPT 1142 ++invocation_count; 1143 if (recursed) ++recursion_count; 1144 success_count += same; 1145 if (same && recursion_helped) 1146 ++recursion_success_count; 1147 if (num_nodes_visited > max_nodes_visited) 1148 max_nodes_visited = num_nodes_visited; 1149 recursed = False; /* reset */ 1150 recursion_helped = False; /* reset */ 1151 #endif /* STATS_IROPT */ 1152 1153 return same; 1154 } 1155 1156 1157 /* Debugging-only hack (not used in production runs): make a guess 1158 whether sameIRExprs might assert due to the two args being of 1159 different types. If in doubt return False. Is only used when 1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0. 1161 Bad because it duplicates functionality from typeOfIRExpr. See 1162 comment on the single use point below for rationale. */ 1163 static 1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) 1165 { 1166 if (e1->tag != e2->tag) return False; 1167 switch (e1->tag) { 1168 case Iex_Const: { 1169 /* The only interesting case */ 1170 IRConst *c1 = e1->Iex.Const.con; 1171 IRConst *c2 = e2->Iex.Const.con; 1172 return c1->tag != c2->tag; 1173 } 1174 default: 1175 break; 1176 } 1177 return False; 1178 } 1179 1180 1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ 1182 static Bool isZeroU32 ( IRExpr* e ) 1183 { 1184 return toBool( e->tag == Iex_Const 1185 && e->Iex.Const.con->tag == Ico_U32 1186 && e->Iex.Const.con->Ico.U32 == 0); 1187 } 1188 1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? 1190 Currently unused; commented out to avoid compiler warning */ 1191 #if 0 1192 static Bool isZeroU64 ( IRExpr* e ) 1193 { 1194 return toBool( e->tag == Iex_Const 1195 && e->Iex.Const.con->tag == Ico_U64 1196 && e->Iex.Const.con->Ico.U64 == 0); 1197 } 1198 #endif 1199 1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */ 1201 static Bool isZeroV128 ( IRExpr* e ) 1202 { 1203 return toBool( e->tag == Iex_Const 1204 && e->Iex.Const.con->tag == Ico_V128 1205 && e->Iex.Const.con->Ico.V128 == 0x0000); 1206 } 1207 1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */ 1209 static Bool isZeroV256 ( IRExpr* e ) 1210 { 1211 return toBool( e->tag == Iex_Const 1212 && e->Iex.Const.con->tag == Ico_V256 1213 && e->Iex.Const.con->Ico.V256 == 0x00000000); 1214 } 1215 1216 /* Is this an integer constant with value 0 ? */ 1217 static Bool isZeroU ( IRExpr* e ) 1218 { 1219 if (e->tag != Iex_Const) return False; 1220 switch (e->Iex.Const.con->tag) { 1221 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0); 1222 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0); 1223 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0); 1224 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0); 1225 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0); 1226 default: vpanic("isZeroU"); 1227 } 1228 } 1229 1230 /* Is this an integer constant with value 1---1b ? */ 1231 static Bool isOnesU ( IRExpr* e ) 1232 { 1233 if (e->tag != Iex_Const) return False; 1234 switch (e->Iex.Const.con->tag) { 1235 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF); 1236 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF); 1237 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 1238 == 0xFFFFFFFF); 1239 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 1240 == 0xFFFFFFFFFFFFFFFFULL); 1241 default: ppIRExpr(e); vpanic("isOnesU"); 1242 } 1243 } 1244 1245 static Bool notBool ( Bool b ) 1246 { 1247 if (b == True) return False; 1248 if (b == False) return True; 1249 vpanic("notBool"); 1250 } 1251 1252 /* Make a zero which has the same type as the result of the given 1253 primop. */ 1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op ) 1255 { 1256 switch (op) { 1257 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0))); 1258 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0)); 1259 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0)); 1260 case Iop_Sub32: 1261 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0)); 1262 case Iop_And64: 1263 case Iop_Sub64: 1264 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0)); 1265 case Iop_XorV128: 1266 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0)); 1267 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0)); 1268 default: vpanic("mkZeroOfPrimopResultType: bad primop"); 1269 } 1270 } 1271 1272 /* Make a value containing all 1-bits, which has the same type as the 1273 result of the given primop. */ 1274 static IRExpr* mkOnesOfPrimopResultType ( IROp op ) 1275 { 1276 switch (op) { 1277 case Iop_CmpEQ32: 1278 case Iop_CmpEQ64: 1279 return IRExpr_Const(IRConst_U1(toBool(1))); 1280 case Iop_Or8: 1281 return IRExpr_Const(IRConst_U8(0xFF)); 1282 case Iop_Or16: 1283 return IRExpr_Const(IRConst_U16(0xFFFF)); 1284 case Iop_Or32: 1285 return IRExpr_Const(IRConst_U32(0xFFFFFFFF)); 1286 case Iop_CmpEQ8x8: 1287 case Iop_Or64: 1288 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL)); 1289 case Iop_CmpEQ8x16: 1290 case Iop_CmpEQ16x8: 1291 case Iop_CmpEQ32x4: 1292 return IRExpr_Const(IRConst_V128(0xFFFF)); 1293 default: 1294 ppIROp(op); 1295 vpanic("mkOnesOfPrimopResultType: bad primop"); 1296 } 1297 } 1298 1299 /* Helpers for folding Clz32/64. */ 1300 static UInt fold_Clz64 ( ULong value ) 1301 { 1302 UInt i; 1303 vassert(value != 0ULL); /* no defined semantics for arg==0 */ 1304 for (i = 0; i < 64; ++i) { 1305 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i; 1306 } 1307 vassert(0); 1308 /*NOTREACHED*/ 1309 return 0; 1310 } 1311 1312 static UInt fold_Clz32 ( UInt value ) 1313 { 1314 UInt i; 1315 vassert(value != 0); /* no defined semantics for arg==0 */ 1316 for (i = 0; i < 32; ++i) { 1317 if (0 != (value & (((UInt)1) << (31 - i)))) return i; 1318 } 1319 vassert(0); 1320 /*NOTREACHED*/ 1321 return 0; 1322 } 1323 1324 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to 1325 the corresponding real constant. */ 1326 //XXX re-check this before use 1327 //static ULong de_summarise_V64 ( UChar v64 ) 1328 //{ 1329 // ULong r = 0; 1330 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL; 1331 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL; 1332 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL; 1333 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL; 1334 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL; 1335 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL; 1336 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL; 1337 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL; 1338 // return r; 1339 //} 1340 1341 /* Helper for arbitrary expression pattern matching in flat IR. If 1342 'e' is a reference to a tmp, look it up in env -- repeatedly, if 1343 necessary -- until it resolves to a non-tmp. Note that this can 1344 return NULL if it can't resolve 'e' to a new expression, which will 1345 be the case if 'e' is instead defined by an IRStmt (IRDirty or 1346 LLSC). */ 1347 static IRExpr* chase ( IRExpr** env, IRExpr* e ) 1348 { 1349 /* Why is this loop guaranteed to terminate? Because all tmps must 1350 have definitions before use, hence a tmp cannot be bound 1351 (directly or indirectly) to itself. */ 1352 while (e->tag == Iex_RdTmp) { 1353 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); } 1354 e = env[(Int)e->Iex.RdTmp.tmp]; 1355 if (e == NULL) break; 1356 } 1357 return e; 1358 } 1359 1360 /* Similar to |chase|, but follows at most one level of tmp reference. */ 1361 static IRExpr* chase1 ( IRExpr** env, IRExpr* e ) 1362 { 1363 if (e == NULL || e->tag != Iex_RdTmp) 1364 return e; 1365 else 1366 return env[(Int)e->Iex.RdTmp.tmp]; 1367 } 1368 1369 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) 1370 { 1371 Int shift; 1372 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ 1373 1374 switch (e->tag) { 1375 case Iex_Unop: 1376 /* UNARY ops */ 1377 if (e->Iex.Unop.arg->tag == Iex_Const) { 1378 switch (e->Iex.Unop.op) { 1379 case Iop_1Uto8: 1380 e2 = IRExpr_Const(IRConst_U8(toUChar( 1381 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1382 ? 1 : 0))); 1383 break; 1384 case Iop_1Uto32: 1385 e2 = IRExpr_Const(IRConst_U32( 1386 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1387 ? 1 : 0)); 1388 break; 1389 case Iop_1Uto64: 1390 e2 = IRExpr_Const(IRConst_U64( 1391 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1392 ? 1 : 0)); 1393 break; 1394 1395 case Iop_1Sto8: 1396 e2 = IRExpr_Const(IRConst_U8(toUChar( 1397 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1398 ? 0xFF : 0))); 1399 break; 1400 case Iop_1Sto16: 1401 e2 = IRExpr_Const(IRConst_U16(toUShort( 1402 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1403 ? 0xFFFF : 0))); 1404 break; 1405 case Iop_1Sto32: 1406 e2 = IRExpr_Const(IRConst_U32( 1407 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1408 ? 0xFFFFFFFF : 0)); 1409 break; 1410 case Iop_1Sto64: 1411 e2 = IRExpr_Const(IRConst_U64( 1412 e->Iex.Unop.arg->Iex.Const.con->Ico.U1 1413 ? 0xFFFFFFFFFFFFFFFFULL : 0)); 1414 break; 1415 1416 case Iop_8Sto32: { 1417 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1418 u32 <<= 24; 1419 u32 = (Int)u32 >> 24; /* signed shift */ 1420 e2 = IRExpr_Const(IRConst_U32(u32)); 1421 break; 1422 } 1423 case Iop_16Sto32: { 1424 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1425 u32 <<= 16; 1426 u32 = (Int)u32 >> 16; /* signed shift */ 1427 e2 = IRExpr_Const(IRConst_U32(u32)); 1428 break; 1429 } 1430 case Iop_8Uto64: 1431 e2 = IRExpr_Const(IRConst_U64( 1432 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1433 break; 1434 case Iop_16Uto64: 1435 e2 = IRExpr_Const(IRConst_U64( 1436 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1437 break; 1438 case Iop_8Uto32: 1439 e2 = IRExpr_Const(IRConst_U32( 1440 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1441 break; 1442 case Iop_8Sto16: { 1443 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8; 1444 u16 <<= 8; 1445 u16 = (Short)u16 >> 8; /* signed shift */ 1446 e2 = IRExpr_Const(IRConst_U16(u16)); 1447 break; 1448 } 1449 case Iop_8Uto16: 1450 e2 = IRExpr_Const(IRConst_U16( 1451 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)); 1452 break; 1453 case Iop_16Uto32: 1454 e2 = IRExpr_Const(IRConst_U32( 1455 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16)); 1456 break; 1457 case Iop_32to16: 1458 e2 = IRExpr_Const(IRConst_U16(toUShort( 1459 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1460 break; 1461 case Iop_32to8: 1462 e2 = IRExpr_Const(IRConst_U8(toUChar( 1463 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1464 break; 1465 case Iop_32to1: 1466 e2 = IRExpr_Const(IRConst_U1(toBool( 1467 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1468 ))); 1469 break; 1470 case Iop_64to1: 1471 e2 = IRExpr_Const(IRConst_U1(toBool( 1472 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64) 1473 ))); 1474 break; 1475 1476 case Iop_NotV128: 1477 e2 = IRExpr_Const(IRConst_V128( 1478 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128))); 1479 break; 1480 case Iop_Not64: 1481 e2 = IRExpr_Const(IRConst_U64( 1482 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); 1483 break; 1484 case Iop_Not32: 1485 e2 = IRExpr_Const(IRConst_U32( 1486 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); 1487 break; 1488 case Iop_Not16: 1489 e2 = IRExpr_Const(IRConst_U16(toUShort( 1490 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16)))); 1491 break; 1492 case Iop_Not8: 1493 e2 = IRExpr_Const(IRConst_U8(toUChar( 1494 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); 1495 break; 1496 1497 case Iop_Not1: 1498 e2 = IRExpr_Const(IRConst_U1( 1499 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); 1500 break; 1501 1502 case Iop_64to8: { 1503 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1504 w64 &= 0xFFULL; 1505 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 )); 1506 break; 1507 } 1508 case Iop_64to16: { 1509 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1510 w64 &= 0xFFFFULL; 1511 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 )); 1512 break; 1513 } 1514 case Iop_64to32: { 1515 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1516 w64 &= 0x00000000FFFFFFFFULL; 1517 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1518 break; 1519 } 1520 case Iop_64HIto32: { 1521 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1522 w64 >>= 32; 1523 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 )); 1524 break; 1525 } 1526 case Iop_32Uto64: 1527 e2 = IRExpr_Const(IRConst_U64( 1528 0xFFFFFFFFULL 1529 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)); 1530 break; 1531 case Iop_16Sto64: { 1532 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1533 u64 <<= 48; 1534 u64 = (Long)u64 >> 48; /* signed shift */ 1535 e2 = IRExpr_Const(IRConst_U64(u64)); 1536 break; 1537 } 1538 case Iop_32Sto64: { 1539 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1540 u64 <<= 32; 1541 u64 = (Long)u64 >> 32; /* signed shift */ 1542 e2 = IRExpr_Const(IRConst_U64(u64)); 1543 break; 1544 } 1545 1546 case Iop_16to8: { 1547 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1548 w16 &= 0xFF; 1549 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1550 break; 1551 } 1552 case Iop_16HIto8: { 1553 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16; 1554 w16 >>= 8; 1555 w16 &= 0xFF; 1556 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 )); 1557 break; 1558 } 1559 1560 case Iop_CmpNEZ8: 1561 e2 = IRExpr_Const(IRConst_U1(toBool( 1562 0 != 1563 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) 1564 ))); 1565 break; 1566 case Iop_CmpNEZ32: 1567 e2 = IRExpr_Const(IRConst_U1(toBool( 1568 0 != 1569 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32) 1570 ))); 1571 break; 1572 case Iop_CmpNEZ64: 1573 e2 = IRExpr_Const(IRConst_U1(toBool( 1574 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64 1575 ))); 1576 break; 1577 1578 case Iop_CmpwNEZ32: { 1579 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1580 if (w32 == 0) 1581 e2 = IRExpr_Const(IRConst_U32( 0 )); 1582 else 1583 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); 1584 break; 1585 } 1586 case Iop_CmpwNEZ64: { 1587 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1588 if (w64 == 0) 1589 e2 = IRExpr_Const(IRConst_U64( 0 )); 1590 else 1591 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); 1592 break; 1593 } 1594 1595 case Iop_Left32: { 1596 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1597 Int s32 = (Int)(u32 & 0xFFFFFFFF); 1598 s32 = (s32 | (-s32)); 1599 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); 1600 break; 1601 } 1602 1603 case Iop_Left64: { 1604 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1605 Long s64 = (Long)u64; 1606 s64 = (s64 | (-s64)); 1607 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); 1608 break; 1609 } 1610 1611 case Iop_Clz32: { 1612 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1613 if (u32 != 0) 1614 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32))); 1615 break; 1616 } 1617 case Iop_Clz64: { 1618 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1619 if (u64 != 0ULL) 1620 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64))); 1621 break; 1622 } 1623 1624 /* For these vector ones, can't fold all cases, but at least 1625 do the most obvious one. Could do better here using 1626 summarise/desummarise of vector constants, but too 1627 difficult to verify; hence just handle the zero cases. */ 1628 case Iop_32UtoV128: { 1629 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; 1630 if (0 == u32) { 1631 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1632 } else { 1633 goto unhandled; 1634 } 1635 break; 1636 } 1637 case Iop_V128to64: { 1638 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1639 if (0 == ((v128 >> 0) & 0xFF)) { 1640 e2 = IRExpr_Const(IRConst_U64(0)); 1641 } else { 1642 goto unhandled; 1643 } 1644 break; 1645 } 1646 case Iop_V128HIto64: { 1647 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1648 if (0 == ((v128 >> 8) & 0xFF)) { 1649 e2 = IRExpr_Const(IRConst_U64(0)); 1650 } else { 1651 goto unhandled; 1652 } 1653 break; 1654 } 1655 case Iop_64UtoV128: { 1656 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; 1657 if (0 == u64) { 1658 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1659 } else { 1660 goto unhandled; 1661 } 1662 break; 1663 } 1664 1665 /* Even stupider (although still correct ..) */ 1666 case Iop_V256to64_0: case Iop_V256to64_1: 1667 case Iop_V256to64_2: case Iop_V256to64_3: { 1668 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256; 1669 if (v256 == 0x00000000) { 1670 e2 = IRExpr_Const(IRConst_U64(0)); 1671 } else { 1672 goto unhandled; 1673 } 1674 break; 1675 } 1676 1677 case Iop_ZeroHI64ofV128: { 1678 /* Could do better here -- only need to look at the bottom 64 bits 1679 of the argument, really. */ 1680 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128; 1681 if (v128 == 0x0000) { 1682 e2 = IRExpr_Const(IRConst_V128(0x0000)); 1683 } else { 1684 goto unhandled; 1685 } 1686 break; 1687 } 1688 1689 default: 1690 goto unhandled; 1691 } 1692 } 1693 break; 1694 1695 case Iex_Binop: 1696 /* BINARY ops */ 1697 if (e->Iex.Binop.arg1->tag == Iex_Const 1698 && e->Iex.Binop.arg2->tag == Iex_Const) { 1699 /* cases where both args are consts */ 1700 switch (e->Iex.Binop.op) { 1701 1702 /* -- Or -- */ 1703 case Iop_Or8: 1704 e2 = IRExpr_Const(IRConst_U8(toUChar( 1705 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1706 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1707 break; 1708 case Iop_Or16: 1709 e2 = IRExpr_Const(IRConst_U16(toUShort( 1710 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1711 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1712 break; 1713 case Iop_Or32: 1714 e2 = IRExpr_Const(IRConst_U32( 1715 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1716 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1717 break; 1718 case Iop_Or64: 1719 e2 = IRExpr_Const(IRConst_U64( 1720 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1721 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1722 break; 1723 case Iop_OrV128: 1724 e2 = IRExpr_Const(IRConst_V128( 1725 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1726 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1727 break; 1728 1729 /* -- Xor -- */ 1730 case Iop_Xor8: 1731 e2 = IRExpr_Const(IRConst_U8(toUChar( 1732 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1733 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1734 break; 1735 case Iop_Xor16: 1736 e2 = IRExpr_Const(IRConst_U16(toUShort( 1737 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1738 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1739 break; 1740 case Iop_Xor32: 1741 e2 = IRExpr_Const(IRConst_U32( 1742 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1743 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1744 break; 1745 case Iop_Xor64: 1746 e2 = IRExpr_Const(IRConst_U64( 1747 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1748 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1749 break; 1750 case Iop_XorV128: 1751 e2 = IRExpr_Const(IRConst_V128( 1752 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1753 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1754 break; 1755 1756 /* -- And -- */ 1757 case Iop_And8: 1758 e2 = IRExpr_Const(IRConst_U8(toUChar( 1759 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1760 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1761 break; 1762 case Iop_And16: 1763 e2 = IRExpr_Const(IRConst_U16(toUShort( 1764 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16 1765 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16)))); 1766 break; 1767 case Iop_And32: 1768 e2 = IRExpr_Const(IRConst_U32( 1769 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1770 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1771 break; 1772 case Iop_And64: 1773 e2 = IRExpr_Const(IRConst_U64( 1774 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1775 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1776 break; 1777 case Iop_AndV128: 1778 e2 = IRExpr_Const(IRConst_V128( 1779 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128 1780 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128))); 1781 break; 1782 1783 /* -- Add -- */ 1784 case Iop_Add8: 1785 e2 = IRExpr_Const(IRConst_U8(toUChar( 1786 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1787 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1788 break; 1789 case Iop_Add32: 1790 e2 = IRExpr_Const(IRConst_U32( 1791 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1792 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1793 break; 1794 case Iop_Add64: 1795 e2 = IRExpr_Const(IRConst_U64( 1796 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1797 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1798 break; 1799 1800 /* -- Sub -- */ 1801 case Iop_Sub8: 1802 e2 = IRExpr_Const(IRConst_U8(toUChar( 1803 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 1804 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))); 1805 break; 1806 case Iop_Sub32: 1807 e2 = IRExpr_Const(IRConst_U32( 1808 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1809 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1810 break; 1811 case Iop_Sub64: 1812 e2 = IRExpr_Const(IRConst_U64( 1813 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1814 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1815 break; 1816 1817 /* -- Max32U -- */ 1818 case Iop_Max32U: { 1819 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1820 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1821 UInt res = u32a > u32b ? u32a : u32b; 1822 e2 = IRExpr_Const(IRConst_U32(res)); 1823 break; 1824 } 1825 1826 /* -- Mul -- */ 1827 case Iop_Mul32: 1828 e2 = IRExpr_Const(IRConst_U32( 1829 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1830 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))); 1831 break; 1832 case Iop_Mul64: 1833 e2 = IRExpr_Const(IRConst_U64( 1834 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1835 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))); 1836 break; 1837 1838 case Iop_MullS32: { 1839 /* very paranoid */ 1840 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 1841 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 1842 Int s32a = (Int)u32a; 1843 Int s32b = (Int)u32b; 1844 Long s64a = (Long)s32a; 1845 Long s64b = (Long)s32b; 1846 Long sres = s64a * s64b; 1847 ULong ures = (ULong)sres; 1848 e2 = IRExpr_Const(IRConst_U64(ures)); 1849 break; 1850 } 1851 1852 /* -- Shl -- */ 1853 case Iop_Shl32: 1854 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1855 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1856 if (shift >= 0 && shift <= 31) 1857 e2 = IRExpr_Const(IRConst_U32( 1858 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1859 << shift))); 1860 break; 1861 case Iop_Shl64: 1862 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1863 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1864 if (shift >= 0 && shift <= 63) 1865 e2 = IRExpr_Const(IRConst_U64( 1866 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1867 << shift))); 1868 break; 1869 1870 /* -- Sar -- */ 1871 case Iop_Sar32: { 1872 /* paranoid ... */ 1873 /*signed*/ Int s32; 1874 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1875 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1876 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1877 if (shift >= 0 && shift <= 31) { 1878 s32 >>=/*signed*/ shift; 1879 e2 = IRExpr_Const(IRConst_U32((UInt)s32)); 1880 } 1881 break; 1882 } 1883 case Iop_Sar64: { 1884 /* paranoid ... */ 1885 /*signed*/ Long s64; 1886 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1887 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1888 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1889 if (shift >= 0 && shift <= 63) { 1890 s64 >>=/*signed*/ shift; 1891 e2 = IRExpr_Const(IRConst_U64((ULong)s64)); 1892 } 1893 break; 1894 } 1895 1896 /* -- Shr -- */ 1897 case Iop_Shr32: { 1898 /* paranoid ... */ 1899 /*unsigned*/ UInt u32; 1900 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1901 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32); 1902 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1903 if (shift >= 0 && shift <= 31) { 1904 u32 >>=/*unsigned*/ shift; 1905 e2 = IRExpr_Const(IRConst_U32(u32)); 1906 } 1907 break; 1908 } 1909 case Iop_Shr64: { 1910 /* paranoid ... */ 1911 /*unsigned*/ ULong u64; 1912 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); 1913 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64); 1914 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8); 1915 if (shift >= 0 && shift <= 63) { 1916 u64 >>=/*unsigned*/ shift; 1917 e2 = IRExpr_Const(IRConst_U64(u64)); 1918 } 1919 break; 1920 } 1921 1922 /* -- CmpEQ -- */ 1923 case Iop_CmpEQ32: 1924 e2 = IRExpr_Const(IRConst_U1(toBool( 1925 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1926 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1927 break; 1928 case Iop_CmpEQ64: 1929 e2 = IRExpr_Const(IRConst_U1(toBool( 1930 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1931 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1932 break; 1933 1934 /* -- CmpNE -- */ 1935 case Iop_CmpNE8: 1936 case Iop_CasCmpNE8: 1937 case Iop_ExpCmpNE8: 1938 e2 = IRExpr_Const(IRConst_U1(toBool( 1939 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) 1940 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); 1941 break; 1942 case Iop_CmpNE32: 1943 case Iop_CasCmpNE32: 1944 case Iop_ExpCmpNE32: 1945 e2 = IRExpr_Const(IRConst_U1(toBool( 1946 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 1947 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); 1948 break; 1949 case Iop_CmpNE64: 1950 case Iop_CasCmpNE64: 1951 case Iop_ExpCmpNE64: 1952 e2 = IRExpr_Const(IRConst_U1(toBool( 1953 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 1954 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); 1955 break; 1956 1957 /* -- CmpLEU -- */ 1958 case Iop_CmpLE32U: 1959 e2 = IRExpr_Const(IRConst_U1(toBool( 1960 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1961 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1962 break; 1963 case Iop_CmpLE64U: 1964 e2 = IRExpr_Const(IRConst_U1(toBool( 1965 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1966 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1967 break; 1968 1969 /* -- CmpLES -- */ 1970 case Iop_CmpLE32S: 1971 e2 = IRExpr_Const(IRConst_U1(toBool( 1972 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1973 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1974 break; 1975 case Iop_CmpLE64S: 1976 e2 = IRExpr_Const(IRConst_U1(toBool( 1977 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1978 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1979 break; 1980 1981 /* -- CmpLTS -- */ 1982 case Iop_CmpLT32S: 1983 e2 = IRExpr_Const(IRConst_U1(toBool( 1984 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1985 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1986 break; 1987 case Iop_CmpLT64S: 1988 e2 = IRExpr_Const(IRConst_U1(toBool( 1989 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 1990 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 1991 break; 1992 1993 /* -- CmpLTU -- */ 1994 case Iop_CmpLT32U: 1995 e2 = IRExpr_Const(IRConst_U1(toBool( 1996 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32) 1997 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))))); 1998 break; 1999 case Iop_CmpLT64U: 2000 e2 = IRExpr_Const(IRConst_U1(toBool( 2001 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64) 2002 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))))); 2003 break; 2004 2005 /* -- CmpORD -- */ 2006 case Iop_CmpORD32S: { 2007 /* very paranoid */ 2008 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32; 2009 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; 2010 Int s32a = (Int)u32a; 2011 Int s32b = (Int)u32b; 2012 Int r = 0x2; /* EQ */ 2013 if (s32a < s32b) { 2014 r = 0x8; /* LT */ 2015 } 2016 else if (s32a > s32b) { 2017 r = 0x4; /* GT */ 2018 } 2019 e2 = IRExpr_Const(IRConst_U32(r)); 2020 break; 2021 } 2022 2023 /* -- nHLto2n -- */ 2024 case Iop_32HLto64: 2025 e2 = IRExpr_Const(IRConst_U64( 2026 (((ULong)(e->Iex.Binop.arg1 2027 ->Iex.Const.con->Ico.U32)) << 32) 2028 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)) 2029 )); 2030 break; 2031 case Iop_64HLto128: 2032 /* We can't fold this, because there is no way to 2033 express he result in IR, but at least pretend to 2034 handle it, so as to stop getting blasted with 2035 no-rule-for-this-primop messages. */ 2036 break; 2037 /* For this vector one, can't fold all cases, but at 2038 least do the most obvious one. Could do better here 2039 using summarise/desummarise of vector constants, but 2040 too difficult to verify; hence just handle the zero 2041 cases. */ 2042 case Iop_64HLtoV128: { 2043 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64; 2044 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; 2045 if (0 == argHi && 0 == argLo) { 2046 e2 = IRExpr_Const(IRConst_V128(0)); 2047 } else { 2048 goto unhandled; 2049 } 2050 break; 2051 } 2052 /* Same reasoning for the 256-bit version. */ 2053 case Iop_V128HLtoV256: { 2054 IRExpr* argHi = e->Iex.Binop.arg1; 2055 IRExpr* argLo = e->Iex.Binop.arg2; 2056 if (isZeroV128(argHi) && isZeroV128(argLo)) { 2057 e2 = IRExpr_Const(IRConst_V256(0)); 2058 } else { 2059 goto unhandled; 2060 } 2061 break; 2062 } 2063 2064 /* -- V128 stuff -- */ 2065 case Iop_InterleaveLO8x16: { 2066 /* This turns up a lot in Memcheck instrumentation of 2067 Icc generated code. I don't know why. */ 2068 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128; 2069 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128; 2070 if (0 == arg1 && 0 == arg2) { 2071 e2 = IRExpr_Const(IRConst_V128(0)); 2072 } else { 2073 goto unhandled; 2074 } 2075 break; 2076 } 2077 2078 default: 2079 goto unhandled; 2080 } 2081 2082 } else { 2083 2084 /* other cases (identities, etc) */ 2085 switch (e->Iex.Binop.op) { 2086 2087 case Iop_Shl32: 2088 case Iop_Shl64: 2089 case Iop_Shr64: 2090 case Iop_Sar64: 2091 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */ 2092 if (isZeroU(e->Iex.Binop.arg2)) { 2093 e2 = e->Iex.Binop.arg1; 2094 break; 2095 } 2096 /* Shl32/Shl64/Shr64(0,x) ==> 0 */ 2097 if (isZeroU(e->Iex.Binop.arg1)) { 2098 e2 = e->Iex.Binop.arg1; 2099 break; 2100 } 2101 break; 2102 2103 case Iop_Sar32: 2104 case Iop_Shr32: 2105 /* Shr32/Sar32(x,0) ==> x */ 2106 if (isZeroU(e->Iex.Binop.arg2)) { 2107 e2 = e->Iex.Binop.arg1; 2108 break; 2109 } 2110 break; 2111 2112 case Iop_Or8: 2113 case Iop_Or16: 2114 case Iop_Or32: 2115 case Iop_Or64: 2116 case Iop_Max32U: 2117 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */ 2118 if (isZeroU(e->Iex.Binop.arg2)) { 2119 e2 = e->Iex.Binop.arg1; 2120 break; 2121 } 2122 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */ 2123 if (isZeroU(e->Iex.Binop.arg1)) { 2124 e2 = e->Iex.Binop.arg2; 2125 break; 2126 } 2127 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */ 2128 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */ 2129 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) { 2130 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2131 break; 2132 } 2133 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */ 2134 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2135 e2 = e->Iex.Binop.arg1; 2136 break; 2137 } 2138 break; 2139 2140 case Iop_Add8: 2141 /* Add8(t,t) ==> t << 1. 2142 Memcheck doesn't understand that 2143 x+x produces a defined least significant bit, and it seems 2144 simplest just to get rid of the problem by rewriting it 2145 out, since the opportunity to do so exists. */ 2146 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2147 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1, 2148 IRExpr_Const(IRConst_U8(1))); 2149 break; 2150 } 2151 break; 2152 2153 /* NB no Add16(t,t) case yet as no known test case exists */ 2154 2155 case Iop_Add32: 2156 case Iop_Add64: 2157 /* Add32/Add64(x,0) ==> x */ 2158 if (isZeroU(e->Iex.Binop.arg2)) { 2159 e2 = e->Iex.Binop.arg1; 2160 break; 2161 } 2162 /* Add32/Add64(0,x) ==> x */ 2163 if (isZeroU(e->Iex.Binop.arg1)) { 2164 e2 = e->Iex.Binop.arg2; 2165 break; 2166 } 2167 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */ 2168 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2169 e2 = IRExpr_Binop( 2170 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64, 2171 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); 2172 break; 2173 } 2174 break; 2175 2176 case Iop_Sub32: 2177 case Iop_Sub64: 2178 /* Sub32/Sub64(x,0) ==> x */ 2179 if (isZeroU(e->Iex.Binop.arg2)) { 2180 e2 = e->Iex.Binop.arg1; 2181 break; 2182 } 2183 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */ 2184 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2185 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2186 break; 2187 } 2188 break; 2189 case Iop_Sub8x16: 2190 /* Sub8x16(x,0) ==> x */ 2191 if (isZeroV128(e->Iex.Binop.arg2)) { 2192 e2 = e->Iex.Binop.arg1; 2193 break; 2194 } 2195 break; 2196 2197 case Iop_And8: 2198 case Iop_And16: 2199 case Iop_And32: 2200 case Iop_And64: 2201 /* And8/And16/And32/And64(x,1---1b) ==> x */ 2202 if (isOnesU(e->Iex.Binop.arg2)) { 2203 e2 = e->Iex.Binop.arg1; 2204 break; 2205 } 2206 /* And8/And16/And32/And64(1---1b,x) ==> x */ 2207 if (isOnesU(e->Iex.Binop.arg1)) { 2208 e2 = e->Iex.Binop.arg2; 2209 break; 2210 } 2211 /* And8/And16/And32/And64(x,0) ==> 0 */ 2212 if (isZeroU(e->Iex.Binop.arg2)) { 2213 e2 = e->Iex.Binop.arg2; 2214 break; 2215 } 2216 /* And8/And16/And32/And64(0,x) ==> 0 */ 2217 if (isZeroU(e->Iex.Binop.arg1)) { 2218 e2 = e->Iex.Binop.arg1; 2219 break; 2220 } 2221 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */ 2222 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2223 e2 = e->Iex.Binop.arg1; 2224 break; 2225 } 2226 break; 2227 2228 case Iop_AndV128: 2229 case Iop_AndV256: 2230 /* And8/And16/AndV128/AndV256(t,t) 2231 ==> t, for some IRTemp t */ 2232 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2233 e2 = e->Iex.Binop.arg1; 2234 break; 2235 } 2236 /* Deal with either arg zero. Could handle other And 2237 cases here too. */ 2238 if (e->Iex.Binop.op == Iop_AndV256 2239 && (isZeroV256(e->Iex.Binop.arg1) 2240 || isZeroV256(e->Iex.Binop.arg2))) { 2241 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2242 break; 2243 } else if (e->Iex.Binop.op == Iop_AndV128 2244 && (isZeroV128(e->Iex.Binop.arg1) 2245 || isZeroV128(e->Iex.Binop.arg2))) { 2246 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2247 break; 2248 } 2249 break; 2250 2251 case Iop_OrV128: 2252 case Iop_OrV256: 2253 /* V128/V256(t,t) ==> t, for some IRTemp t */ 2254 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2255 e2 = e->Iex.Binop.arg1; 2256 break; 2257 } 2258 /* OrV128(t,0) ==> t */ 2259 if (e->Iex.Binop.op == Iop_OrV128) { 2260 if (isZeroV128(e->Iex.Binop.arg2)) { 2261 e2 = e->Iex.Binop.arg1; 2262 break; 2263 } 2264 if (isZeroV128(e->Iex.Binop.arg1)) { 2265 e2 = e->Iex.Binop.arg2; 2266 break; 2267 } 2268 } 2269 /* OrV256(t,0) ==> t */ 2270 if (e->Iex.Binop.op == Iop_OrV256) { 2271 if (isZeroV256(e->Iex.Binop.arg2)) { 2272 e2 = e->Iex.Binop.arg1; 2273 break; 2274 } 2275 //Disabled because there's no known test case right now. 2276 //if (isZeroV256(e->Iex.Binop.arg1)) { 2277 // e2 = e->Iex.Binop.arg2; 2278 // break; 2279 //} 2280 } 2281 break; 2282 2283 case Iop_Xor8: 2284 case Iop_Xor16: 2285 case Iop_Xor32: 2286 case Iop_Xor64: 2287 case Iop_XorV128: 2288 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ 2289 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2290 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2291 break; 2292 } 2293 /* XorV128(t,0) ==> t */ 2294 if (e->Iex.Binop.op == Iop_XorV128) { 2295 if (isZeroV128(e->Iex.Binop.arg2)) { 2296 e2 = e->Iex.Binop.arg1; 2297 break; 2298 } 2299 //Disabled because there's no known test case right now. 2300 //if (isZeroV128(e->Iex.Binop.arg1)) { 2301 // e2 = e->Iex.Binop.arg2; 2302 // break; 2303 //} 2304 } else { 2305 /* Xor8/16/32/64(0,t) ==> t */ 2306 if (isZeroU(e->Iex.Binop.arg1)) { 2307 e2 = e->Iex.Binop.arg2; 2308 break; 2309 } 2310 /* Xor8/16/32/64(t,0) ==> t */ 2311 if (isZeroU(e->Iex.Binop.arg2)) { 2312 e2 = e->Iex.Binop.arg1; 2313 break; 2314 } 2315 } 2316 break; 2317 2318 case Iop_CmpNE32: 2319 /* CmpNE32(t,t) ==> 0, for some IRTemp t */ 2320 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2321 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); 2322 break; 2323 } 2324 /* CmpNE32(1Uto32(b), 0) ==> b */ 2325 if (isZeroU32(e->Iex.Binop.arg2)) { 2326 IRExpr* a1 = chase(env, e->Iex.Binop.arg1); 2327 if (a1 && a1->tag == Iex_Unop 2328 && a1->Iex.Unop.op == Iop_1Uto32) { 2329 e2 = a1->Iex.Unop.arg; 2330 break; 2331 } 2332 } 2333 break; 2334 2335 case Iop_CmpEQ32: 2336 case Iop_CmpEQ64: 2337 case Iop_CmpEQ8x8: 2338 case Iop_CmpEQ8x16: 2339 case Iop_CmpEQ16x8: 2340 case Iop_CmpEQ32x4: 2341 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2342 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); 2343 break; 2344 } 2345 break; 2346 2347 default: 2348 break; 2349 } 2350 } 2351 break; 2352 2353 case Iex_ITE: 2354 /* ITE */ 2355 /* is the discriminant is a constant? */ 2356 if (e->Iex.ITE.cond->tag == Iex_Const) { 2357 /* assured us by the IR type rules */ 2358 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1); 2359 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1 2360 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse; 2361 } 2362 else 2363 /* are the arms identical? (pretty weedy test) */ 2364 if (sameIRExprs(env, e->Iex.ITE.iftrue, 2365 e->Iex.ITE.iffalse)) { 2366 e2 = e->Iex.ITE.iffalse; 2367 } 2368 break; 2369 2370 default: 2371 /* not considered */ 2372 break; 2373 } 2374 2375 /* Show cases where we've found but not folded 'op(t,t)'. Be 2376 careful not to call sameIRExprs with values of different types, 2377 though, else it will assert (and so it should!). We can't 2378 conveniently call typeOfIRExpr on the two args without a whole 2379 bunch of extra plumbing to pass in a type env, so just use a 2380 hacky test to check the arguments are not anything that might 2381 sameIRExprs to assert. This is only OK because this kludge is 2382 only used for debug printing, not for "real" operation. For 2383 "real" operation (ie, all other calls to sameIRExprs), it is 2384 essential that the to args have the same type. 2385 2386 The "right" solution is to plumb the containing block's 2387 IRTypeEnv through to here and use typeOfIRExpr to be sure. But 2388 that's a bunch of extra parameter passing which will just slow 2389 down the normal case, for no purpose. */ 2390 if (vex_control.iropt_verbosity > 0 2391 && e == e2 2392 && e->tag == Iex_Binop 2393 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1, 2394 e->Iex.Binop.arg2) 2395 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { 2396 vex_printf("vex iropt: fold_Expr: no ident rule for: "); 2397 ppIRExpr(e); 2398 vex_printf("\n"); 2399 } 2400 2401 /* Show the overall results of folding. */ 2402 if (DEBUG_IROPT && e2 != e) { 2403 vex_printf("FOLD: "); 2404 ppIRExpr(e); vex_printf(" -> "); 2405 ppIRExpr(e2); vex_printf("\n"); 2406 } 2407 2408 return e2; 2409 2410 unhandled: 2411 # if 0 2412 vex_printf("\n\n"); 2413 ppIRExpr(e); 2414 vpanic("fold_Expr: no rule for the above"); 2415 # else 2416 if (vex_control.iropt_verbosity > 0) { 2417 vex_printf("vex iropt: fold_Expr: no const rule for: "); 2418 ppIRExpr(e); 2419 vex_printf("\n"); 2420 } 2421 return e2; 2422 # endif 2423 } 2424 2425 2426 /* Apply the subst to a simple 1-level expression -- guaranteed to be 2427 1-level due to previous flattening pass. */ 2428 2429 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) 2430 { 2431 switch (ex->tag) { 2432 case Iex_RdTmp: 2433 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { 2434 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; 2435 if (rhs->tag == Iex_RdTmp) 2436 return rhs; 2437 if (rhs->tag == Iex_Const 2438 && rhs->Iex.Const.con->tag != Ico_F64i) 2439 return rhs; 2440 } 2441 /* not bound in env */ 2442 return ex; 2443 2444 case Iex_Const: 2445 case Iex_Get: 2446 return ex; 2447 2448 case Iex_GetI: 2449 vassert(isIRAtom(ex->Iex.GetI.ix)); 2450 return IRExpr_GetI( 2451 ex->Iex.GetI.descr, 2452 subst_Expr(env, ex->Iex.GetI.ix), 2453 ex->Iex.GetI.bias 2454 ); 2455 2456 case Iex_Qop: { 2457 IRQop* qop = ex->Iex.Qop.details; 2458 vassert(isIRAtom(qop->arg1)); 2459 vassert(isIRAtom(qop->arg2)); 2460 vassert(isIRAtom(qop->arg3)); 2461 vassert(isIRAtom(qop->arg4)); 2462 return IRExpr_Qop( 2463 qop->op, 2464 subst_Expr(env, qop->arg1), 2465 subst_Expr(env, qop->arg2), 2466 subst_Expr(env, qop->arg3), 2467 subst_Expr(env, qop->arg4) 2468 ); 2469 } 2470 2471 case Iex_Triop: { 2472 IRTriop* triop = ex->Iex.Triop.details; 2473 vassert(isIRAtom(triop->arg1)); 2474 vassert(isIRAtom(triop->arg2)); 2475 vassert(isIRAtom(triop->arg3)); 2476 return IRExpr_Triop( 2477 triop->op, 2478 subst_Expr(env, triop->arg1), 2479 subst_Expr(env, triop->arg2), 2480 subst_Expr(env, triop->arg3) 2481 ); 2482 } 2483 2484 case Iex_Binop: 2485 vassert(isIRAtom(ex->Iex.Binop.arg1)); 2486 vassert(isIRAtom(ex->Iex.Binop.arg2)); 2487 return IRExpr_Binop( 2488 ex->Iex.Binop.op, 2489 subst_Expr(env, ex->Iex.Binop.arg1), 2490 subst_Expr(env, ex->Iex.Binop.arg2) 2491 ); 2492 2493 case Iex_Unop: 2494 vassert(isIRAtom(ex->Iex.Unop.arg)); 2495 return IRExpr_Unop( 2496 ex->Iex.Unop.op, 2497 subst_Expr(env, ex->Iex.Unop.arg) 2498 ); 2499 2500 case Iex_Load: 2501 vassert(isIRAtom(ex->Iex.Load.addr)); 2502 return IRExpr_Load( 2503 ex->Iex.Load.end, 2504 ex->Iex.Load.ty, 2505 subst_Expr(env, ex->Iex.Load.addr) 2506 ); 2507 2508 case Iex_CCall: { 2509 Int i; 2510 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args); 2511 for (i = 0; args2[i]; i++) { 2512 vassert(isIRAtom(args2[i])); 2513 args2[i] = subst_Expr(env, args2[i]); 2514 } 2515 return IRExpr_CCall( 2516 ex->Iex.CCall.cee, 2517 ex->Iex.CCall.retty, 2518 args2 2519 ); 2520 } 2521 2522 case Iex_ITE: 2523 vassert(isIRAtom(ex->Iex.ITE.cond)); 2524 vassert(isIRAtom(ex->Iex.ITE.iftrue)); 2525 vassert(isIRAtom(ex->Iex.ITE.iffalse)); 2526 return IRExpr_ITE( 2527 subst_Expr(env, ex->Iex.ITE.cond), 2528 subst_Expr(env, ex->Iex.ITE.iftrue), 2529 subst_Expr(env, ex->Iex.ITE.iffalse) 2530 ); 2531 2532 default: 2533 vex_printf("\n\n"); ppIRExpr(ex); 2534 vpanic("subst_Expr"); 2535 2536 } 2537 } 2538 2539 2540 /* Apply the subst to stmt, then fold the result as much as possible. 2541 Much simplified due to stmt being previously flattened. As a 2542 result of this, the stmt may wind up being turned into a no-op. 2543 */ 2544 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) 2545 { 2546 # if 0 2547 vex_printf("\nsubst and fold stmt\n"); 2548 ppIRStmt(st); 2549 vex_printf("\n"); 2550 # endif 2551 2552 switch (st->tag) { 2553 case Ist_AbiHint: 2554 vassert(isIRAtom(st->Ist.AbiHint.base)); 2555 vassert(isIRAtom(st->Ist.AbiHint.nia)); 2556 return IRStmt_AbiHint( 2557 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)), 2558 st->Ist.AbiHint.len, 2559 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia)) 2560 ); 2561 case Ist_Put: 2562 vassert(isIRAtom(st->Ist.Put.data)); 2563 return IRStmt_Put( 2564 st->Ist.Put.offset, 2565 fold_Expr(env, subst_Expr(env, st->Ist.Put.data)) 2566 ); 2567 2568 case Ist_PutI: { 2569 IRPutI *puti, *puti2; 2570 puti = st->Ist.PutI.details; 2571 vassert(isIRAtom(puti->ix)); 2572 vassert(isIRAtom(puti->data)); 2573 puti2 = mkIRPutI(puti->descr, 2574 fold_Expr(env, subst_Expr(env, puti->ix)), 2575 puti->bias, 2576 fold_Expr(env, subst_Expr(env, puti->data))); 2577 return IRStmt_PutI(puti2); 2578 } 2579 2580 case Ist_WrTmp: 2581 /* This is the one place where an expr (st->Ist.WrTmp.data) is 2582 allowed to be more than just a constant or a tmp. */ 2583 return IRStmt_WrTmp( 2584 st->Ist.WrTmp.tmp, 2585 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data)) 2586 ); 2587 2588 case Ist_Store: 2589 vassert(isIRAtom(st->Ist.Store.addr)); 2590 vassert(isIRAtom(st->Ist.Store.data)); 2591 return IRStmt_Store( 2592 st->Ist.Store.end, 2593 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)), 2594 fold_Expr(env, subst_Expr(env, st->Ist.Store.data)) 2595 ); 2596 2597 case Ist_StoreG: { 2598 IRStoreG* sg = st->Ist.StoreG.details; 2599 vassert(isIRAtom(sg->addr)); 2600 vassert(isIRAtom(sg->data)); 2601 vassert(isIRAtom(sg->guard)); 2602 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr)); 2603 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data)); 2604 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard)); 2605 if (fguard->tag == Iex_Const) { 2606 /* The condition on this store has folded down to a constant. */ 2607 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2608 if (fguard->Iex.Const.con->Ico.U1 == False) { 2609 return IRStmt_NoOp(); 2610 } else { 2611 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2612 return IRStmt_Store(sg->end, faddr, fdata); 2613 } 2614 } 2615 return IRStmt_StoreG(sg->end, faddr, fdata, fguard); 2616 } 2617 2618 case Ist_LoadG: { 2619 /* This is complicated. If the guard folds down to 'false', 2620 we can replace it with an assignment 'dst := alt', but if 2621 the guard folds down to 'true', we can't conveniently 2622 replace it with an unconditional load, because doing so 2623 requires generating a new temporary, and that is not easy 2624 to do at this point. */ 2625 IRLoadG* lg = st->Ist.LoadG.details; 2626 vassert(isIRAtom(lg->addr)); 2627 vassert(isIRAtom(lg->alt)); 2628 vassert(isIRAtom(lg->guard)); 2629 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr)); 2630 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt)); 2631 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard)); 2632 if (fguard->tag == Iex_Const) { 2633 /* The condition on this load has folded down to a constant. */ 2634 vassert(fguard->Iex.Const.con->tag == Ico_U1); 2635 if (fguard->Iex.Const.con->Ico.U1 == False) { 2636 /* The load is not going to happen -- instead 'alt' is 2637 assigned to 'dst'. */ 2638 return IRStmt_WrTmp(lg->dst, falt); 2639 } else { 2640 vassert(fguard->Iex.Const.con->Ico.U1 == True); 2641 /* The load is always going to happen. We want to 2642 convert to an unconditional load and assign to 'dst' 2643 (IRStmt_WrTmp). Problem is we need an extra temp to 2644 hold the loaded value, but none is available. 2645 Instead, reconstitute the conditional load (with 2646 folded args, of course) and let the caller of this 2647 routine deal with the problem. */ 2648 } 2649 } 2650 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard); 2651 } 2652 2653 case Ist_CAS: { 2654 IRCAS *cas, *cas2; 2655 cas = st->Ist.CAS.details; 2656 vassert(isIRAtom(cas->addr)); 2657 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 2658 vassert(isIRAtom(cas->expdLo)); 2659 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 2660 vassert(isIRAtom(cas->dataLo)); 2661 cas2 = mkIRCAS( 2662 cas->oldHi, cas->oldLo, cas->end, 2663 fold_Expr(env, subst_Expr(env, cas->addr)), 2664 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) 2665 : NULL, 2666 fold_Expr(env, subst_Expr(env, cas->expdLo)), 2667 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) 2668 : NULL, 2669 fold_Expr(env, subst_Expr(env, cas->dataLo)) 2670 ); 2671 return IRStmt_CAS(cas2); 2672 } 2673 2674 case Ist_LLSC: 2675 vassert(isIRAtom(st->Ist.LLSC.addr)); 2676 if (st->Ist.LLSC.storedata) 2677 vassert(isIRAtom(st->Ist.LLSC.storedata)); 2678 return IRStmt_LLSC( 2679 st->Ist.LLSC.end, 2680 st->Ist.LLSC.result, 2681 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)), 2682 st->Ist.LLSC.storedata 2683 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata)) 2684 : NULL 2685 ); 2686 2687 case Ist_Dirty: { 2688 Int i; 2689 IRDirty *d, *d2; 2690 d = st->Ist.Dirty.details; 2691 d2 = emptyIRDirty(); 2692 *d2 = *d; 2693 d2->args = shallowCopyIRExprVec(d2->args); 2694 if (d2->mFx != Ifx_None) { 2695 vassert(isIRAtom(d2->mAddr)); 2696 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr)); 2697 } 2698 vassert(isIRAtom(d2->guard)); 2699 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard)); 2700 for (i = 0; d2->args[i]; i++) { 2701 IRExpr* arg = d2->args[i]; 2702 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { 2703 vassert(isIRAtom(arg)); 2704 d2->args[i] = fold_Expr(env, subst_Expr(env, arg)); 2705 } 2706 } 2707 return IRStmt_Dirty(d2); 2708 } 2709 2710 case Ist_IMark: 2711 return IRStmt_IMark(st->Ist.IMark.addr, 2712 st->Ist.IMark.len, 2713 st->Ist.IMark.delta); 2714 2715 case Ist_NoOp: 2716 return IRStmt_NoOp(); 2717 2718 case Ist_MBE: 2719 return IRStmt_MBE(st->Ist.MBE.event); 2720 2721 case Ist_Exit: { 2722 IRExpr* fcond; 2723 vassert(isIRAtom(st->Ist.Exit.guard)); 2724 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard)); 2725 if (fcond->tag == Iex_Const) { 2726 /* Interesting. The condition on this exit has folded down to 2727 a constant. */ 2728 vassert(fcond->Iex.Const.con->tag == Ico_U1); 2729 if (fcond->Iex.Const.con->Ico.U1 == False) { 2730 /* exit is never going to happen, so dump the statement. */ 2731 return IRStmt_NoOp(); 2732 } else { 2733 vassert(fcond->Iex.Const.con->Ico.U1 == True); 2734 /* Hmmm. The exit has become unconditional. Leave it 2735 as it is for now, since we'd have to truncate the BB 2736 at this point, which is tricky. Such truncation is 2737 done later by the dead-code elimination pass. */ 2738 /* fall out into the reconstruct-the-exit code. */ 2739 if (vex_control.iropt_verbosity > 0) 2740 /* really a misuse of vex_control.iropt_verbosity */ 2741 vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); 2742 } 2743 } 2744 return IRStmt_Exit(fcond, st->Ist.Exit.jk, 2745 st->Ist.Exit.dst, st->Ist.Exit.offsIP); 2746 } 2747 2748 default: 2749 vex_printf("\n"); ppIRStmt(st); 2750 vpanic("subst_and_fold_Stmt"); 2751 } 2752 } 2753 2754 2755 IRSB* cprop_BB ( IRSB* in ) 2756 { 2757 Int i; 2758 IRSB* out; 2759 IRStmt* st2; 2760 Int n_tmps = in->tyenv->types_used; 2761 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); 2762 /* Keep track of IRStmt_LoadGs that we need to revisit after 2763 processing all the other statements. */ 2764 const Int N_FIXUPS = 16; 2765 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */ 2766 Int n_fixups = 0; 2767 2768 out = emptyIRSB(); 2769 out->tyenv = deepCopyIRTypeEnv( in->tyenv ); 2770 2771 /* Set up the env with which travels forward. This holds a 2772 substitution, mapping IRTemps to IRExprs. The environment 2773 is to be applied as we move along. Keys are IRTemps. 2774 Values are IRExpr*s. 2775 */ 2776 for (i = 0; i < n_tmps; i++) 2777 env[i] = NULL; 2778 2779 /* For each original SSA-form stmt ... */ 2780 for (i = 0; i < in->stmts_used; i++) { 2781 2782 /* First apply the substitution to the current stmt. This 2783 propagates in any constants and tmp-tmp assignments 2784 accumulated prior to this point. As part of the subst_Stmt 2785 call, also then fold any constant expressions resulting. */ 2786 2787 st2 = in->stmts[i]; 2788 2789 /* perhaps st2 is already a no-op? */ 2790 if (st2->tag == Ist_NoOp) continue; 2791 2792 st2 = subst_and_fold_Stmt( env, st2 ); 2793 2794 /* Deal with some post-folding special cases. */ 2795 switch (st2->tag) { 2796 2797 /* If the statement has been folded into a no-op, forget 2798 it. */ 2799 case Ist_NoOp: 2800 continue; 2801 2802 /* If the statement assigns to an IRTemp add it to the 2803 running environment. This is for the benefit of copy 2804 propagation and to allow sameIRExpr look through 2805 IRTemps. */ 2806 case Ist_WrTmp: { 2807 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); 2808 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; 2809 2810 /* 't1 = t2' -- don't add to BB; will be optimized out */ 2811 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) 2812 continue; 2813 2814 /* 't = const' && 'const != F64i' -- don't add to BB 2815 Note, we choose not to propagate const when const is an 2816 F64i, so that F64i literals can be CSE'd later. This 2817 helps x86 floating point code generation. */ 2818 if (st2->Ist.WrTmp.data->tag == Iex_Const 2819 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) { 2820 continue; 2821 } 2822 /* else add it to the output, as normal */ 2823 break; 2824 } 2825 2826 case Ist_LoadG: { 2827 IRLoadG* lg = st2->Ist.LoadG.details; 2828 IRExpr* guard = lg->guard; 2829 if (guard->tag == Iex_Const) { 2830 /* The guard has folded to a constant, and that 2831 constant must be 1:I1, since subst_and_fold_Stmt 2832 folds out the case 0:I1 by itself. */ 2833 vassert(guard->Iex.Const.con->tag == Ico_U1); 2834 vassert(guard->Iex.Const.con->Ico.U1 == True); 2835 /* Add a NoOp here as a placeholder, and make a note of 2836 where it is in the output block. Afterwards we'll 2837 come back here and transform the NoOp and the LoadG 2838 into a load-convert pair. The fixups[] entry 2839 refers to the inserted NoOp, and we expect to find 2840 the relevant LoadG immediately after it. */ 2841 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2842 if (n_fixups < N_FIXUPS) { 2843 fixups[n_fixups++] = out->stmts_used; 2844 addStmtToIRSB( out, IRStmt_NoOp() ); 2845 } 2846 } 2847 /* And always add the LoadG to the output, regardless. */ 2848 break; 2849 } 2850 2851 default: 2852 break; 2853 } 2854 2855 /* Not interesting, copy st2 into the output block. */ 2856 addStmtToIRSB( out, st2 ); 2857 } 2858 2859 # if STATS_IROPT 2860 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", 2861 invocation_count, recursion_count, success_count, 2862 recursion_success_count, max_nodes_visited); 2863 # endif 2864 2865 out->next = subst_Expr( env, in->next ); 2866 out->jumpkind = in->jumpkind; 2867 out->offsIP = in->offsIP; 2868 2869 /* Process any leftover unconditional LoadGs that we noticed 2870 in the main pass. */ 2871 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); 2872 for (i = 0; i < n_fixups; i++) { 2873 Int ix = fixups[i]; 2874 /* Carefully verify that the LoadG has the expected form. */ 2875 vassert(ix >= 0 && ix+1 < out->stmts_used); 2876 IRStmt* nop = out->stmts[ix]; 2877 IRStmt* lgu = out->stmts[ix+1]; 2878 vassert(nop->tag == Ist_NoOp); 2879 vassert(lgu->tag == Ist_LoadG); 2880 IRLoadG* lg = lgu->Ist.LoadG.details; 2881 IRExpr* guard = lg->guard; 2882 vassert(guard->Iex.Const.con->tag == Ico_U1); 2883 vassert(guard->Iex.Const.con->Ico.U1 == True); 2884 /* Figure out the load and result types, and the implied 2885 conversion operation. */ 2886 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID; 2887 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg); 2888 IROp cvtOp = Iop_INVALID; 2889 switch (lg->cvt) { 2890 case ILGop_Ident32: break; 2891 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break; 2892 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break; 2893 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break; 2894 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break; 2895 default: vpanic("cprop_BB: unhandled ILGOp"); 2896 } 2897 /* Replace the placeholder NoOp by the required unconditional 2898 load. */ 2899 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg); 2900 out->stmts[ix] 2901 = IRStmt_WrTmp(tLoaded, 2902 IRExpr_Load(lg->end, cvtArg, lg->addr)); 2903 /* Replace the LoadG by a conversion from the loaded value's 2904 type to the required result type. */ 2905 out->stmts[ix+1] 2906 = IRStmt_WrTmp( 2907 lg->dst, cvtOp == Iop_INVALID 2908 ? IRExpr_RdTmp(tLoaded) 2909 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded))); 2910 } 2911 2912 return out; 2913 } 2914 2915 2916 /*---------------------------------------------------------------*/ 2917 /*--- Dead code (t = E) removal ---*/ 2918 /*---------------------------------------------------------------*/ 2919 2920 /* As a side effect, also removes all code following an unconditional 2921 side exit. */ 2922 2923 /* The type of the HashHW map is: a map from IRTemp to nothing 2924 -- really just operating a set or IRTemps. 2925 */ 2926 2927 inline 2928 static void addUses_Temp ( Bool* set, IRTemp tmp ) 2929 { 2930 set[(Int)tmp] = True; 2931 } 2932 2933 static void addUses_Expr ( Bool* set, IRExpr* e ) 2934 { 2935 Int i; 2936 switch (e->tag) { 2937 case Iex_GetI: 2938 addUses_Expr(set, e->Iex.GetI.ix); 2939 return; 2940 case Iex_ITE: 2941 addUses_Expr(set, e->Iex.ITE.cond); 2942 addUses_Expr(set, e->Iex.ITE.iftrue); 2943 addUses_Expr(set, e->Iex.ITE.iffalse); 2944 return; 2945 case Iex_CCall: 2946 for (i = 0; e->Iex.CCall.args[i]; i++) 2947 addUses_Expr(set, e->Iex.CCall.args[i]); 2948 return; 2949 case Iex_Load: 2950 addUses_Expr(set, e->Iex.Load.addr); 2951 return; 2952 case Iex_Qop: 2953 addUses_Expr(set, e->Iex.Qop.details->arg1); 2954 addUses_Expr(set, e->Iex.Qop.details->arg2); 2955 addUses_Expr(set, e->Iex.Qop.details->arg3); 2956 addUses_Expr(set, e->Iex.Qop.details->arg4); 2957 return; 2958 case Iex_Triop: 2959 addUses_Expr(set, e->Iex.Triop.details->arg1); 2960 addUses_Expr(set, e->Iex.Triop.details->arg2); 2961 addUses_Expr(set, e->Iex.Triop.details->arg3); 2962 return; 2963 case Iex_Binop: 2964 addUses_Expr(set, e->Iex.Binop.arg1); 2965 addUses_Expr(set, e->Iex.Binop.arg2); 2966 return; 2967 case Iex_Unop: 2968 addUses_Expr(set, e->Iex.Unop.arg); 2969 return; 2970 case Iex_RdTmp: 2971 addUses_Temp(set, e->Iex.RdTmp.tmp); 2972 return; 2973 case Iex_Const: 2974 case Iex_Get: 2975 return; 2976 default: 2977 vex_printf("\n"); 2978 ppIRExpr(e); 2979 vpanic("addUses_Expr"); 2980 } 2981 } 2982 2983 static void addUses_Stmt ( Bool* set, IRStmt* st ) 2984 { 2985 Int i; 2986 IRDirty* d; 2987 IRCAS* cas; 2988 switch (st->tag) { 2989 case Ist_AbiHint: 2990 addUses_Expr(set, st->Ist.AbiHint.base); 2991 addUses_Expr(set, st->Ist.AbiHint.nia); 2992 return; 2993 case Ist_PutI: 2994 addUses_Expr(set, st->Ist.PutI.details->ix); 2995 addUses_Expr(set, st->Ist.PutI.details->data); 2996 return; 2997 case Ist_WrTmp: 2998 addUses_Expr(set, st->Ist.WrTmp.data); 2999 return; 3000 case Ist_Put: 3001 addUses_Expr(set, st->Ist.Put.data); 3002 return; 3003 case Ist_Store: 3004 addUses_Expr(set, st->Ist.Store.addr); 3005 addUses_Expr(set, st->Ist.Store.data); 3006 return; 3007 case Ist_StoreG: { 3008 IRStoreG* sg = st->Ist.StoreG.details; 3009 addUses_Expr(set, sg->addr); 3010 addUses_Expr(set, sg->data); 3011 addUses_Expr(set, sg->guard); 3012 return; 3013 } 3014 case Ist_LoadG: { 3015 IRLoadG* lg = st->Ist.LoadG.details; 3016 addUses_Expr(set, lg->addr); 3017 addUses_Expr(set, lg->alt); 3018 addUses_Expr(set, lg->guard); 3019 return; 3020 } 3021 case Ist_CAS: 3022 cas = st->Ist.CAS.details; 3023 addUses_Expr(set, cas->addr); 3024 if (cas->expdHi) 3025 addUses_Expr(set, cas->expdHi); 3026 addUses_Expr(set, cas->expdLo); 3027 if (cas->dataHi) 3028 addUses_Expr(set, cas->dataHi); 3029 addUses_Expr(set, cas->dataLo); 3030 return; 3031 case Ist_LLSC: 3032 addUses_Expr(set, st->Ist.LLSC.addr); 3033 if (st->Ist.LLSC.storedata) 3034 addUses_Expr(set, st->Ist.LLSC.storedata); 3035 return; 3036 case Ist_Dirty: 3037 d = st->Ist.Dirty.details; 3038 if (d->mFx != Ifx_None) 3039 addUses_Expr(set, d->mAddr); 3040 addUses_Expr(set, d->guard); 3041 for (i = 0; d->args[i] != NULL; i++) { 3042 IRExpr* arg = d->args[i]; 3043 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 3044 addUses_Expr(set, arg); 3045 } 3046 return; 3047 case Ist_NoOp: 3048 case Ist_IMark: 3049 case Ist_MBE: 3050 return; 3051 case Ist_Exit: 3052 addUses_Expr(set, st->Ist.Exit.guard); 3053 return; 3054 default: 3055 vex_printf("\n"); 3056 ppIRStmt(st); 3057 vpanic("addUses_Stmt"); 3058 } 3059 } 3060 3061 3062 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */ 3063 static Bool isZeroU1 ( IRExpr* e ) 3064 { 3065 return toBool( e->tag == Iex_Const 3066 && e->Iex.Const.con->tag == Ico_U1 3067 && e->Iex.Const.con->Ico.U1 == False ); 3068 } 3069 3070 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */ 3071 static Bool isOneU1 ( IRExpr* e ) 3072 { 3073 return toBool( e->tag == Iex_Const 3074 && e->Iex.Const.con->tag == Ico_U1 3075 && e->Iex.Const.con->Ico.U1 == True ); 3076 } 3077 3078 3079 /* Note, this destructively modifies the given IRSB. */ 3080 3081 /* Scan backwards through statements, carrying a set of IRTemps which 3082 are known to be used after the current point. On encountering 't = 3083 E', delete the binding if it is not used. Otherwise, add any temp 3084 uses to the set and keep on moving backwards. 3085 3086 As an enhancement, the first (backwards) pass searches for IR exits 3087 with always-taken conditions and notes the location of the earliest 3088 one in the block. If any such are found, a second pass copies the 3089 exit destination and jump kind to the bb-end. Then, the exit and 3090 all statements following it are turned into no-ops. 3091 */ 3092 3093 /* notstatic */ void do_deadcode_BB ( IRSB* bb ) 3094 { 3095 Int i, i_unconditional_exit; 3096 Int n_tmps = bb->tyenv->types_used; 3097 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool)); 3098 IRStmt* st; 3099 3100 for (i = 0; i < n_tmps; i++) 3101 set[i] = False; 3102 3103 /* start off by recording IRTemp uses in the next field. */ 3104 addUses_Expr(set, bb->next); 3105 3106 /* First pass */ 3107 3108 /* Work backwards through the stmts */ 3109 i_unconditional_exit = -1; 3110 for (i = bb->stmts_used-1; i >= 0; i--) { 3111 st = bb->stmts[i]; 3112 if (st->tag == Ist_NoOp) 3113 continue; 3114 /* take note of any unconditional exits */ 3115 if (st->tag == Ist_Exit 3116 && isOneU1(st->Ist.Exit.guard)) 3117 i_unconditional_exit = i; 3118 if (st->tag == Ist_WrTmp 3119 && set[(Int)(st->Ist.WrTmp.tmp)] == False) { 3120 /* it's an IRTemp which never got used. Delete it. */ 3121 if (DEBUG_IROPT) { 3122 vex_printf("DEAD: "); 3123 ppIRStmt(st); 3124 vex_printf("\n"); 3125 } 3126 bb->stmts[i] = IRStmt_NoOp(); 3127 } 3128 else 3129 if (st->tag == Ist_Dirty 3130 && st->Ist.Dirty.details->guard 3131 && isZeroU1(st->Ist.Dirty.details->guard)) { 3132 /* This is a dirty helper which will never get called. 3133 Delete it. */ 3134 bb->stmts[i] = IRStmt_NoOp(); 3135 } 3136 else { 3137 /* Note any IRTemp uses made by the current statement. */ 3138 addUses_Stmt(set, st); 3139 } 3140 } 3141 3142 /* Optional second pass: if any unconditional exits were found, 3143 delete them and all following statements. */ 3144 3145 if (i_unconditional_exit != -1) { 3146 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", 3147 i_unconditional_exit); 3148 vassert(i_unconditional_exit >= 0 3149 && i_unconditional_exit < bb->stmts_used); 3150 bb->next 3151 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); 3152 bb->jumpkind 3153 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; 3154 bb->offsIP 3155 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; 3156 for (i = i_unconditional_exit; i < bb->stmts_used; i++) 3157 bb->stmts[i] = IRStmt_NoOp(); 3158 } 3159 } 3160 3161 3162 /*---------------------------------------------------------------*/ 3163 /*--- Specialisation of helper function calls, in ---*/ 3164 /*--- collaboration with the front end ---*/ 3165 /*---------------------------------------------------------------*/ 3166 3167 static 3168 IRSB* spec_helpers_BB( 3169 IRSB* bb, 3170 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int) 3171 ) 3172 { 3173 Int i; 3174 IRStmt* st; 3175 IRExpr* ex; 3176 Bool any = False; 3177 3178 for (i = bb->stmts_used-1; i >= 0; i--) { 3179 st = bb->stmts[i]; 3180 3181 if (st->tag != Ist_WrTmp 3182 || st->Ist.WrTmp.data->tag != Iex_CCall) 3183 continue; 3184 3185 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, 3186 st->Ist.WrTmp.data->Iex.CCall.args, 3187 &bb->stmts[0], i ); 3188 if (!ex) 3189 /* the front end can't think of a suitable replacement */ 3190 continue; 3191 3192 /* We got something better. Install it in the bb. */ 3193 any = True; 3194 bb->stmts[i] 3195 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); 3196 3197 if (0) { 3198 vex_printf("SPEC: "); 3199 ppIRExpr(st->Ist.WrTmp.data); 3200 vex_printf(" --> "); 3201 ppIRExpr(ex); 3202 vex_printf("\n"); 3203 } 3204 } 3205 3206 if (any) 3207 bb = flatten_BB(bb); 3208 return bb; 3209 } 3210 3211 3212 /*---------------------------------------------------------------*/ 3213 /*--- Determination of guest state aliasing relationships ---*/ 3214 /*---------------------------------------------------------------*/ 3215 3216 /* These are helper functions for CSE and GetI/PutI transformations. 3217 3218 Determine, to the extent possible, the relationship between two 3219 guest state accesses. The possible outcomes are: 3220 3221 * Exact alias. These two accesses denote precisely the same 3222 piece of the guest state. 3223 3224 * Definitely no alias. These two accesses are guaranteed not to 3225 overlap any part of the guest state. 3226 3227 * Unknown -- if neither of the above can be established. 3228 3229 If in doubt, return Unknown. */ 3230 3231 typedef 3232 enum { ExactAlias, NoAlias, UnknownAlias } 3233 GSAliasing; 3234 3235 3236 /* Produces the alias relation between an indexed guest 3237 state access and a non-indexed access. */ 3238 3239 static 3240 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1, 3241 Int offset2, IRType ty2 ) 3242 { 3243 UInt minoff1, maxoff1, minoff2, maxoff2; 3244 3245 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3246 minoff2 = offset2; 3247 maxoff2 = minoff2 + sizeofIRType(ty2) - 1; 3248 3249 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3250 return NoAlias; 3251 3252 /* Could probably do better here if required. For the moment 3253 however just claim not to know anything more. */ 3254 return UnknownAlias; 3255 } 3256 3257 3258 /* Produces the alias relation between two indexed guest state 3259 accesses. */ 3260 3261 static 3262 GSAliasing getAliasingRelation_II ( 3263 IRRegArray* descr1, IRExpr* ix1, Int bias1, 3264 IRRegArray* descr2, IRExpr* ix2, Int bias2 3265 ) 3266 { 3267 UInt minoff1, maxoff1, minoff2, maxoff2; 3268 Int iters; 3269 3270 /* First try hard to show they don't alias. */ 3271 getArrayBounds( descr1, &minoff1, &maxoff1 ); 3272 getArrayBounds( descr2, &minoff2, &maxoff2 ); 3273 if (maxoff1 < minoff2 || maxoff2 < minoff1) 3274 return NoAlias; 3275 3276 /* So the two arrays at least partially overlap. To get any 3277 further we'll have to be sure that the descriptors are 3278 identical. */ 3279 if (!eqIRRegArray(descr1, descr2)) 3280 return UnknownAlias; 3281 3282 /* The descriptors are identical. Now the only difference can be 3283 in the index expressions. If they cannot be shown to be 3284 identical, we have to say we don't know what the aliasing 3285 relation will be. Now, since the IR is flattened, the index 3286 expressions should be atoms -- either consts or tmps. So that 3287 makes the comparison simple. */ 3288 vassert(isIRAtom(ix1)); 3289 vassert(isIRAtom(ix2)); 3290 if (!eqIRAtom(ix1,ix2)) 3291 return UnknownAlias; 3292 3293 /* Ok, the index expressions are identical. So now the only way 3294 they can be different is in the bias. Normalise this 3295 paranoidly, to reliably establish equality/non-equality. */ 3296 3297 /* So now we know that the GetI and PutI index the same array 3298 with the same base. Are the offsets the same, modulo the 3299 array size? Do this paranoidly. */ 3300 vassert(descr1->nElems == descr2->nElems); 3301 vassert(descr1->elemTy == descr2->elemTy); 3302 vassert(descr1->base == descr2->base); 3303 iters = 0; 3304 while (bias1 < 0 || bias2 < 0) { 3305 bias1 += descr1->nElems; 3306 bias2 += descr1->nElems; 3307 iters++; 3308 if (iters > 10) 3309 vpanic("getAliasingRelation: iters"); 3310 } 3311 vassert(bias1 >= 0 && bias2 >= 0); 3312 bias1 %= descr1->nElems; 3313 bias2 %= descr1->nElems; 3314 vassert(bias1 >= 0 && bias1 < descr1->nElems); 3315 vassert(bias2 >= 0 && bias2 < descr1->nElems); 3316 3317 /* Finally, biasP and biasG are normalised into the range 3318 0 .. descrP/G->nElems - 1. And so we can establish 3319 equality/non-equality. */ 3320 3321 return bias1==bias2 ? ExactAlias : NoAlias; 3322 } 3323 3324 3325 /*---------------------------------------------------------------*/ 3326 /*--- Common Subexpression Elimination ---*/ 3327 /*---------------------------------------------------------------*/ 3328 3329 /* Expensive in time and space. */ 3330 3331 /* Uses two environments: 3332 a IRTemp -> IRTemp mapping 3333 a mapping from AvailExpr* to IRTemp 3334 */ 3335 3336 typedef 3337 struct { 3338 enum { TCc, TCt } tag; 3339 union { IRTemp tmp; IRConst* con; } u; 3340 } 3341 TmpOrConst; 3342 3343 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 ) 3344 { 3345 if (tc1->tag != tc2->tag) 3346 return False; 3347 switch (tc1->tag) { 3348 case TCc: 3349 return eqIRConst(tc1->u.con, tc2->u.con); 3350 case TCt: 3351 return tc1->u.tmp == tc2->u.tmp; 3352 default: 3353 vpanic("eqTmpOrConst"); 3354 } 3355 } 3356 3357 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 ) 3358 { 3359 Bool eq = cee1->addr == cee2->addr; 3360 if (eq) { 3361 vassert(cee1->regparms == cee2->regparms); 3362 vassert(cee1->mcx_mask == cee2->mcx_mask); 3363 /* Names should be the same too, but we don't bother to 3364 check. */ 3365 } 3366 return eq; 3367 } 3368 3369 /* Convert an atomic IRExpr* to a TmpOrConst. */ 3370 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e ) 3371 { 3372 switch (e->tag) { 3373 case Iex_RdTmp: 3374 tc->tag = TCt; 3375 tc->u.tmp = e->Iex.RdTmp.tmp; 3376 break; 3377 case Iex_Const: 3378 tc->tag = TCc; 3379 tc->u.con = e->Iex.Const.con; 3380 break; 3381 default: 3382 /* Getting here is a serious error. It means that the 3383 presented arg isn't an IR atom, as it should be. */ 3384 vpanic("irExpr_to_TmpOrConst"); 3385 } 3386 } 3387 3388 /* Convert a TmpOrConst to an atomic IRExpr*. */ 3389 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc ) 3390 { 3391 switch (tc->tag) { 3392 case TCc: return IRExpr_Const(tc->u.con); 3393 case TCt: return IRExpr_RdTmp(tc->u.tmp); 3394 default: vpanic("tmpOrConst_to_IRExpr"); 3395 } 3396 } 3397 3398 /* Convert a NULL terminated IRExpr* vector to an array of 3399 TmpOrConsts, and a length. */ 3400 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs, 3401 /*OUT*/Int* nOuts, 3402 IRExpr** ins ) 3403 { 3404 Int i, n; 3405 /* We have to make two passes, one to count, one to copy. */ 3406 for (n = 0; ins[n]; n++) 3407 ; 3408 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst)); 3409 *nOuts = n; 3410 /* and now copy .. */ 3411 for (i = 0; i < n; i++) { 3412 IRExpr* arg = ins[i]; 3413 TmpOrConst* dst = &(*outs)[i]; 3414 irExpr_to_TmpOrConst(dst, arg); 3415 } 3416 } 3417 3418 typedef 3419 struct { 3420 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt, 3421 CCall, Load 3422 } tag; 3423 union { 3424 /* unop(tmp) */ 3425 struct { 3426 IROp op; 3427 IRTemp arg; 3428 } Ut; 3429 /* binop(tmp,tmp) */ 3430 struct { 3431 IROp op; 3432 IRTemp arg1; 3433 IRTemp arg2; 3434 } Btt; 3435 /* binop(tmp,const) */ 3436 struct { 3437 IROp op; 3438 IRTemp arg1; 3439 IRConst con2; 3440 } Btc; 3441 /* binop(const,tmp) */ 3442 struct { 3443 IROp op; 3444 IRConst con1; 3445 IRTemp arg2; 3446 } Bct; 3447 /* F64i-style const */ 3448 struct { 3449 ULong f64i; 3450 } Cf64i; 3451 /* ITE(tmp,tmp,tmp) */ 3452 struct { 3453 IRTemp co; 3454 IRTemp e1; 3455 IRTemp e0; 3456 } Ittt; 3457 /* ITE(tmp,tmp,const) */ 3458 struct { 3459 IRTemp co; 3460 IRTemp e1; 3461 IRConst con0; 3462 } Ittc; 3463 /* ITE(tmp,const,tmp) */ 3464 struct { 3465 IRTemp co; 3466 IRConst con1; 3467 IRTemp e0; 3468 } Itct; 3469 /* ITE(tmp,const,const) */ 3470 struct { 3471 IRTemp co; 3472 IRConst con1; 3473 IRConst con0; 3474 } Itcc; 3475 /* GetI(descr,tmp,bias)*/ 3476 struct { 3477 IRRegArray* descr; 3478 IRTemp ix; 3479 Int bias; 3480 } GetIt; 3481 /* Clean helper call */ 3482 struct { 3483 IRCallee* cee; 3484 TmpOrConst* args; 3485 Int nArgs; 3486 IRType retty; 3487 } CCall; 3488 /* Load(end,ty,addr) */ 3489 struct { 3490 IREndness end; 3491 IRType ty; 3492 TmpOrConst addr; 3493 } Load; 3494 } u; 3495 } 3496 AvailExpr; 3497 3498 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 ) 3499 { 3500 if (LIKELY(a1->tag != a2->tag)) 3501 return False; 3502 switch (a1->tag) { 3503 case Ut: 3504 return toBool( 3505 a1->u.Ut.op == a2->u.Ut.op 3506 && a1->u.Ut.arg == a2->u.Ut.arg); 3507 case Btt: 3508 return toBool( 3509 a1->u.Btt.op == a2->u.Btt.op 3510 && a1->u.Btt.arg1 == a2->u.Btt.arg1 3511 && a1->u.Btt.arg2 == a2->u.Btt.arg2); 3512 case Btc: 3513 return toBool( 3514 a1->u.Btc.op == a2->u.Btc.op 3515 && a1->u.Btc.arg1 == a2->u.Btc.arg1 3516 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2)); 3517 case Bct: 3518 return toBool( 3519 a1->u.Bct.op == a2->u.Bct.op 3520 && a1->u.Bct.arg2 == a2->u.Bct.arg2 3521 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1)); 3522 case Cf64i: 3523 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i); 3524 case Ittt: 3525 return toBool(a1->u.Ittt.co == a2->u.Ittt.co 3526 && a1->u.Ittt.e1 == a2->u.Ittt.e1 3527 && a1->u.Ittt.e0 == a2->u.Ittt.e0); 3528 case Ittc: 3529 return toBool(a1->u.Ittc.co == a2->u.Ittc.co 3530 && a1->u.Ittc.e1 == a2->u.Ittc.e1 3531 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0)); 3532 case Itct: 3533 return toBool(a1->u.Itct.co == a2->u.Itct.co 3534 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1) 3535 && a1->u.Itct.e0 == a2->u.Itct.e0); 3536 case Itcc: 3537 return toBool(a1->u.Itcc.co == a2->u.Itcc.co 3538 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1) 3539 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0)); 3540 case GetIt: 3541 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr) 3542 && a1->u.GetIt.ix == a2->u.GetIt.ix 3543 && a1->u.GetIt.bias == a2->u.GetIt.bias); 3544 case CCall: { 3545 Int i, n; 3546 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs 3547 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee); 3548 if (eq) { 3549 n = a1->u.CCall.nArgs; 3550 for (i = 0; i < n; i++) { 3551 if (!eqTmpOrConst( &a1->u.CCall.args[i], 3552 &a2->u.CCall.args[i] )) { 3553 eq = False; 3554 break; 3555 } 3556 } 3557 } 3558 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty); 3559 return eq; 3560 } 3561 case Load: { 3562 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end 3563 && a1->u.Load.ty == a2->u.Load.ty 3564 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr)); 3565 return eq; 3566 } 3567 default: 3568 vpanic("eq_AvailExpr"); 3569 } 3570 } 3571 3572 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae ) 3573 { 3574 IRConst *con, *con0, *con1; 3575 switch (ae->tag) { 3576 case Ut: 3577 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) ); 3578 case Btt: 3579 return IRExpr_Binop( ae->u.Btt.op, 3580 IRExpr_RdTmp(ae->u.Btt.arg1), 3581 IRExpr_RdTmp(ae->u.Btt.arg2) ); 3582 case Btc: 3583 con = LibVEX_Alloc_inline(sizeof(IRConst)); 3584 *con = ae->u.Btc.con2; 3585 return IRExpr_Binop( ae->u.Btc.op, 3586 IRExpr_RdTmp(ae->u.Btc.arg1), 3587 IRExpr_Const(con) ); 3588 case Bct: 3589 con = LibVEX_Alloc_inline(sizeof(IRConst)); 3590 *con = ae->u.Bct.con1; 3591 return IRExpr_Binop( ae->u.Bct.op, 3592 IRExpr_Const(con), 3593 IRExpr_RdTmp(ae->u.Bct.arg2) ); 3594 case Cf64i: 3595 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i)); 3596 case Ittt: 3597 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co), 3598 IRExpr_RdTmp(ae->u.Ittt.e1), 3599 IRExpr_RdTmp(ae->u.Ittt.e0)); 3600 case Ittc: 3601 con0 = LibVEX_Alloc_inline(sizeof(IRConst)); 3602 *con0 = ae->u.Ittc.con0; 3603 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co), 3604 IRExpr_RdTmp(ae->u.Ittc.e1), 3605 IRExpr_Const(con0)); 3606 case Itct: 3607 con1 = LibVEX_Alloc_inline(sizeof(IRConst)); 3608 *con1 = ae->u.Itct.con1; 3609 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co), 3610 IRExpr_Const(con1), 3611 IRExpr_RdTmp(ae->u.Itct.e0)); 3612 3613 case Itcc: 3614 con0 = LibVEX_Alloc_inline(sizeof(IRConst)); 3615 con1 = LibVEX_Alloc_inline(sizeof(IRConst)); 3616 *con0 = ae->u.Itcc.con0; 3617 *con1 = ae->u.Itcc.con1; 3618 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co), 3619 IRExpr_Const(con1), 3620 IRExpr_Const(con0)); 3621 case GetIt: 3622 return IRExpr_GetI(ae->u.GetIt.descr, 3623 IRExpr_RdTmp(ae->u.GetIt.ix), 3624 ae->u.GetIt.bias); 3625 case CCall: { 3626 Int i, n = ae->u.CCall.nArgs; 3627 vassert(n >= 0); 3628 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*)); 3629 vec[n] = NULL; 3630 for (i = 0; i < n; i++) { 3631 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]); 3632 } 3633 return IRExpr_CCall(ae->u.CCall.cee, 3634 ae->u.CCall.retty, 3635 vec); 3636 } 3637 case Load: 3638 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty, 3639 tmpOrConst_to_IRExpr(&ae->u.Load.addr)); 3640 default: 3641 vpanic("availExpr_to_IRExpr"); 3642 } 3643 } 3644 3645 inline 3646 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp ) 3647 { 3648 HWord res; 3649 /* env :: IRTemp -> IRTemp */ 3650 if (lookupHHW( env, &res, (HWord)tmp )) 3651 return (IRTemp)res; 3652 else 3653 return tmp; 3654 } 3655 3656 inline 3657 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc, 3658 HashHW* env ) 3659 { 3660 /* env :: IRTemp -> IRTemp */ 3661 if (tc->tag == TCt) { 3662 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp ); 3663 } 3664 } 3665 3666 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae ) 3667 { 3668 /* env :: IRTemp -> IRTemp */ 3669 switch (ae->tag) { 3670 case Ut: 3671 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg ); 3672 break; 3673 case Btt: 3674 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 ); 3675 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 ); 3676 break; 3677 case Btc: 3678 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 ); 3679 break; 3680 case Bct: 3681 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 ); 3682 break; 3683 case Cf64i: 3684 break; 3685 case Ittt: 3686 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co ); 3687 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 ); 3688 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 ); 3689 break; 3690 case Ittc: 3691 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co ); 3692 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 ); 3693 break; 3694 case Itct: 3695 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co ); 3696 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 ); 3697 break; 3698 case Itcc: 3699 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co ); 3700 break; 3701 case GetIt: 3702 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix ); 3703 break; 3704 case CCall: { 3705 Int i, n = ae->u.CCall.nArgs;; 3706 for (i = 0; i < n; i++) { 3707 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env); 3708 } 3709 break; 3710 } 3711 case Load: 3712 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env); 3713 break; 3714 default: 3715 vpanic("subst_AvailExpr"); 3716 } 3717 } 3718 3719 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd ) 3720 { 3721 AvailExpr* ae; 3722 3723 switch (e->tag) { 3724 case Iex_Unop: 3725 if (e->Iex.Unop.arg->tag == Iex_RdTmp) { 3726 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3727 ae->tag = Ut; 3728 ae->u.Ut.op = e->Iex.Unop.op; 3729 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp; 3730 return ae; 3731 } 3732 break; 3733 3734 case Iex_Binop: 3735 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) { 3736 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3737 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3738 ae->tag = Btt; 3739 ae->u.Btt.op = e->Iex.Binop.op; 3740 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3741 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3742 return ae; 3743 } 3744 if (e->Iex.Binop.arg2->tag == Iex_Const) { 3745 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3746 ae->tag = Btc; 3747 ae->u.Btc.op = e->Iex.Binop.op; 3748 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 3749 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con); 3750 return ae; 3751 } 3752 } else if (e->Iex.Binop.arg1->tag == Iex_Const 3753 && e->Iex.Binop.arg2->tag == Iex_RdTmp) { 3754 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3755 ae->tag = Bct; 3756 ae->u.Bct.op = e->Iex.Binop.op; 3757 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp; 3758 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con); 3759 return ae; 3760 } 3761 break; 3762 3763 case Iex_Const: 3764 if (e->Iex.Const.con->tag == Ico_F64i) { 3765 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3766 ae->tag = Cf64i; 3767 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i; 3768 return ae; 3769 } 3770 break; 3771 3772 case Iex_ITE: 3773 if (e->Iex.ITE.cond->tag == Iex_RdTmp) { 3774 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) { 3775 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3776 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3777 ae->tag = Ittt; 3778 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3779 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3780 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3781 return ae; 3782 } 3783 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3784 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3785 ae->tag = Itct; 3786 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3787 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3788 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp; 3789 return ae; 3790 } 3791 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) { 3792 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) { 3793 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3794 ae->tag = Ittc; 3795 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3796 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp; 3797 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3798 return ae; 3799 } 3800 if (e->Iex.ITE.iftrue->tag == Iex_Const) { 3801 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3802 ae->tag = Itcc; 3803 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp; 3804 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con); 3805 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con); 3806 return ae; 3807 } 3808 } 3809 } 3810 break; 3811 3812 case Iex_GetI: 3813 if (e->Iex.GetI.ix->tag == Iex_RdTmp) { 3814 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3815 ae->tag = GetIt; 3816 ae->u.GetIt.descr = e->Iex.GetI.descr; 3817 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp; 3818 ae->u.GetIt.bias = e->Iex.GetI.bias; 3819 return ae; 3820 } 3821 break; 3822 3823 case Iex_CCall: 3824 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3825 ae->tag = CCall; 3826 /* Ok to share only the cee, since it is immutable. */ 3827 ae->u.CCall.cee = e->Iex.CCall.cee; 3828 ae->u.CCall.retty = e->Iex.CCall.retty; 3829 /* irExprVec_to_TmpOrConsts will assert if the args are 3830 neither tmps nor constants, but that's ok .. that's all they 3831 should be. */ 3832 irExprVec_to_TmpOrConsts( 3833 &ae->u.CCall.args, &ae->u.CCall.nArgs, 3834 e->Iex.CCall.args 3835 ); 3836 return ae; 3837 3838 case Iex_Load: 3839 /* If the caller of do_cse_BB has requested that loads also 3840 be CSEd, convert them into AvailExprs. If not, we'll just 3841 return NULL here, and the load never becomes considered 3842 "available", which effectively disables CSEing of them, as 3843 desired. */ 3844 if (allowLoadsToBeCSEd) { 3845 ae = LibVEX_Alloc_inline(sizeof(AvailExpr)); 3846 ae->tag = Load; 3847 ae->u.Load.end = e->Iex.Load.end; 3848 ae->u.Load.ty = e->Iex.Load.ty; 3849 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr); 3850 return ae; 3851 } 3852 break; 3853 3854 default: 3855 break; 3856 } 3857 3858 return NULL; 3859 } 3860 3861 3862 /* The BB is modified in-place. Returns True if any changes were 3863 made. The caller can choose whether or not loads should be CSEd. 3864 In the normal course of things we don't do that, since CSEing loads 3865 is something of a dodgy proposition if the guest program is doing 3866 some screwy stuff to do with races and spinloops. */ 3867 3868 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) 3869 { 3870 Int i, j, paranoia; 3871 IRTemp t, q; 3872 IRStmt* st; 3873 AvailExpr* eprime; 3874 AvailExpr* ae; 3875 Bool invalidate; 3876 Bool anyDone = False; 3877 3878 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ 3879 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ 3880 3881 vassert(sizeof(IRTemp) <= sizeof(HWord)); 3882 3883 if (0) { ppIRSB(bb); vex_printf("\n\n"); } 3884 3885 /* Iterate forwards over the stmts. 3886 On seeing "t = E", where E is one of the AvailExpr forms: 3887 let E' = apply tenv substitution to E 3888 search aenv for E' 3889 if a mapping E' -> q is found, 3890 replace this stmt by "t = q" 3891 and add binding t -> q to tenv 3892 else 3893 add binding E' -> t to aenv 3894 replace this stmt by "t = E'" 3895 3896 Other statements are only interesting to the extent that they 3897 might invalidate some of the expressions in aenv. So there is 3898 an invalidate-bindings check for each statement seen. 3899 */ 3900 for (i = 0; i < bb->stmts_used; i++) { 3901 st = bb->stmts[i]; 3902 3903 /* ------ BEGIN invalidate aenv bindings ------ */ 3904 /* This is critical: remove from aenv any E' -> .. bindings 3905 which might be invalidated by this statement. The only 3906 vulnerable kind of bindings are the GetI and Load kinds. 3907 Dirty call - dump (paranoia level -> 2) 3908 Store - dump (ditto) 3909 Put, PutI - dump unless no-overlap is proven (.. -> 1) 3910 Uses getAliasingRelation_IC and getAliasingRelation_II 3911 to do the no-overlap assessments needed for Put/PutI. 3912 */ 3913 switch (st->tag) { 3914 case Ist_Dirty: case Ist_Store: case Ist_MBE: 3915 case Ist_CAS: case Ist_LLSC: 3916 case Ist_StoreG: 3917 paranoia = 2; break; 3918 case Ist_Put: case Ist_PutI: 3919 paranoia = 1; break; 3920 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: 3921 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG: 3922 paranoia = 0; break; 3923 default: 3924 vpanic("do_cse_BB(1)"); 3925 } 3926 3927 if (paranoia > 0) { 3928 for (j = 0; j < aenv->used; j++) { 3929 if (!aenv->inuse[j]) 3930 continue; 3931 ae = (AvailExpr*)aenv->key[j]; 3932 if (ae->tag != GetIt && ae->tag != Load) 3933 continue; 3934 invalidate = False; 3935 if (paranoia >= 2) { 3936 invalidate = True; 3937 } else { 3938 vassert(paranoia == 1); 3939 if (ae->tag == Load) { 3940 /* Loads can be invalidated by anything that could 3941 possibly touch memory. But in that case we 3942 should have |paranoia| == 2 and we won't get 3943 here. So there's nothing to do; we don't have to 3944 invalidate the load. */ 3945 } 3946 else 3947 if (st->tag == Ist_Put) { 3948 if (getAliasingRelation_IC( 3949 ae->u.GetIt.descr, 3950 IRExpr_RdTmp(ae->u.GetIt.ix), 3951 st->Ist.Put.offset, 3952 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) 3953 ) != NoAlias) 3954 invalidate = True; 3955 } 3956 else 3957 if (st->tag == Ist_PutI) { 3958 IRPutI *puti = st->Ist.PutI.details; 3959 if (getAliasingRelation_II( 3960 ae->u.GetIt.descr, 3961 IRExpr_RdTmp(ae->u.GetIt.ix), 3962 ae->u.GetIt.bias, 3963 puti->descr, 3964 puti->ix, 3965 puti->bias 3966 ) != NoAlias) 3967 invalidate = True; 3968 } 3969 else 3970 vpanic("do_cse_BB(2)"); 3971 } 3972 3973 if (invalidate) { 3974 aenv->inuse[j] = False; 3975 aenv->key[j] = (HWord)NULL; /* be sure */ 3976 } 3977 } /* for j */ 3978 } /* paranoia > 0 */ 3979 3980 /* ------ ENV invalidate aenv bindings ------ */ 3981 3982 /* ignore not-interestings */ 3983 if (st->tag != Ist_WrTmp) 3984 continue; 3985 3986 t = st->Ist.WrTmp.tmp; 3987 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd); 3988 /* ignore if not of AvailExpr form */ 3989 if (!eprime) 3990 continue; 3991 3992 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */ 3993 3994 /* apply tenv */ 3995 subst_AvailExpr( tenv, eprime ); 3996 3997 /* search aenv for eprime, unfortunately the hard way */ 3998 for (j = 0; j < aenv->used; j++) 3999 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j])) 4000 break; 4001 4002 if (j < aenv->used) { 4003 /* A binding E' -> q was found. Replace stmt by "t = q" and 4004 note the t->q binding in tenv. */ 4005 /* (this is the core of the CSE action) */ 4006 q = (IRTemp)aenv->val[j]; 4007 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); 4008 addToHHW( tenv, (HWord)t, (HWord)q ); 4009 anyDone = True; 4010 } else { 4011 /* No binding was found, so instead we add E' -> t to our 4012 collection of available expressions, replace this stmt 4013 with "t = E'", and move on. */ 4014 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); 4015 addToHHW( aenv, (HWord)eprime, (HWord)t ); 4016 } 4017 } 4018 4019 /* 4020 ppIRSB(bb); 4021 sanityCheckIRSB(bb, Ity_I32); 4022 vex_printf("\n\n"); 4023 */ 4024 return anyDone; 4025 } 4026 4027 4028 /*---------------------------------------------------------------*/ 4029 /*--- Add32/Sub32 chain collapsing ---*/ 4030 /*---------------------------------------------------------------*/ 4031 4032 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */ 4033 4034 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If 4035 yes, set *tmp and *i32 appropriately. *i32 is set as if the 4036 root node is Add32, not Sub32. */ 4037 4038 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) 4039 { 4040 if (e->tag != Iex_Binop) 4041 return False; 4042 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32) 4043 return False; 4044 if (e->Iex.Binop.arg1->tag != Iex_RdTmp) 4045 return False; 4046 if (e->Iex.Binop.arg2->tag != Iex_Const) 4047 return False; 4048 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp; 4049 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32); 4050 if (e->Iex.Binop.op == Iop_Sub32) 4051 *i32 = -*i32; 4052 return True; 4053 } 4054 4055 4056 /* Figure out if tmp can be expressed as tmp2 +32 const, for some 4057 other tmp2. Scan backwards from the specified start point -- an 4058 optimisation. */ 4059 4060 static Bool collapseChain ( IRSB* bb, Int startHere, 4061 IRTemp tmp, 4062 IRTemp* tmp2, Int* i32 ) 4063 { 4064 Int j, ii; 4065 IRTemp vv; 4066 IRStmt* st; 4067 IRExpr* e; 4068 4069 /* the (var, con) pair contain the current 'representation' for 4070 'tmp'. We start with 'tmp + 0'. */ 4071 IRTemp var = tmp; 4072 Int con = 0; 4073 4074 /* Scan backwards to see if tmp can be replaced by some other tmp 4075 +/- a constant. */ 4076 for (j = startHere; j >= 0; j--) { 4077 st = bb->stmts[j]; 4078 if (st->tag != Ist_WrTmp) 4079 continue; 4080 if (st->Ist.WrTmp.tmp != var) 4081 continue; 4082 e = st->Ist.WrTmp.data; 4083 if (!isAdd32OrSub32(e, &vv, &ii)) 4084 break; 4085 var = vv; 4086 con += ii; 4087 } 4088 if (j == -1) 4089 /* no earlier binding for var .. ill-formed IR */ 4090 vpanic("collapseChain"); 4091 4092 /* so, did we find anything interesting? */ 4093 if (var == tmp) 4094 return False; /* no .. */ 4095 4096 *tmp2 = var; 4097 *i32 = con; 4098 return True; 4099 } 4100 4101 4102 /* ------- Main function for Add32/Sub32 chain collapsing ------ */ 4103 4104 static void collapse_AddSub_chains_BB ( IRSB* bb ) 4105 { 4106 IRStmt *st; 4107 IRTemp var, var2; 4108 Int i, con, con2; 4109 4110 for (i = bb->stmts_used-1; i >= 0; i--) { 4111 st = bb->stmts[i]; 4112 if (st->tag == Ist_NoOp) 4113 continue; 4114 4115 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */ 4116 4117 if (st->tag == Ist_WrTmp 4118 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) { 4119 4120 /* So e1 is of the form Add32(var,con) or Sub32(var,-con). 4121 Find out if var can be expressed as var2 + con2. */ 4122 if (collapseChain(bb, i-1, var, &var2, &con2)) { 4123 if (DEBUG_IROPT) { 4124 vex_printf("replacing1 "); 4125 ppIRStmt(st); 4126 vex_printf(" with "); 4127 } 4128 con2 += con; 4129 bb->stmts[i] 4130 = IRStmt_WrTmp( 4131 st->Ist.WrTmp.tmp, 4132 (con2 >= 0) 4133 ? IRExpr_Binop(Iop_Add32, 4134 IRExpr_RdTmp(var2), 4135 IRExpr_Const(IRConst_U32(con2))) 4136 : IRExpr_Binop(Iop_Sub32, 4137 IRExpr_RdTmp(var2), 4138 IRExpr_Const(IRConst_U32(-con2))) 4139 ); 4140 if (DEBUG_IROPT) { 4141 ppIRStmt(bb->stmts[i]); 4142 vex_printf("\n"); 4143 } 4144 } 4145 4146 continue; 4147 } 4148 4149 /* Try to collapse 't1 = GetI[t2, con]'. */ 4150 4151 if (st->tag == Ist_WrTmp 4152 && st->Ist.WrTmp.data->tag == Iex_GetI 4153 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp 4154 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix 4155 ->Iex.RdTmp.tmp, &var2, &con2)) { 4156 if (DEBUG_IROPT) { 4157 vex_printf("replacing3 "); 4158 ppIRStmt(st); 4159 vex_printf(" with "); 4160 } 4161 con2 += st->Ist.WrTmp.data->Iex.GetI.bias; 4162 bb->stmts[i] 4163 = IRStmt_WrTmp( 4164 st->Ist.WrTmp.tmp, 4165 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, 4166 IRExpr_RdTmp(var2), 4167 con2)); 4168 if (DEBUG_IROPT) { 4169 ppIRStmt(bb->stmts[i]); 4170 vex_printf("\n"); 4171 } 4172 continue; 4173 } 4174 4175 /* Perhaps st is PutI[t, con] ? */ 4176 IRPutI *puti = st->Ist.PutI.details; 4177 if (st->tag == Ist_PutI 4178 && puti->ix->tag == Iex_RdTmp 4179 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp, 4180 &var2, &con2)) { 4181 if (DEBUG_IROPT) { 4182 vex_printf("replacing2 "); 4183 ppIRStmt(st); 4184 vex_printf(" with "); 4185 } 4186 con2 += puti->bias; 4187 bb->stmts[i] 4188 = IRStmt_PutI(mkIRPutI(puti->descr, 4189 IRExpr_RdTmp(var2), 4190 con2, 4191 puti->data)); 4192 if (DEBUG_IROPT) { 4193 ppIRStmt(bb->stmts[i]); 4194 vex_printf("\n"); 4195 } 4196 continue; 4197 } 4198 4199 } /* for */ 4200 } 4201 4202 4203 /*---------------------------------------------------------------*/ 4204 /*--- PutI/GetI transformations ---*/ 4205 /*---------------------------------------------------------------*/ 4206 4207 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from 4208 the given starting point to find, if any, a PutI which writes 4209 exactly the same piece of guest state, and so return the expression 4210 that the PutI writes. This is the core of PutI-GetI forwarding. */ 4211 4212 static 4213 IRExpr* findPutI ( IRSB* bb, Int startHere, 4214 IRRegArray* descrG, IRExpr* ixG, Int biasG ) 4215 { 4216 Int j; 4217 IRStmt* st; 4218 GSAliasing relation; 4219 4220 if (0) { 4221 vex_printf("\nfindPutI "); 4222 ppIRRegArray(descrG); 4223 vex_printf(" "); 4224 ppIRExpr(ixG); 4225 vex_printf(" %d\n", biasG); 4226 } 4227 4228 /* Scan backwards in bb from startHere to find a suitable PutI 4229 binding for (descrG, ixG, biasG), if any. */ 4230 4231 for (j = startHere; j >= 0; j--) { 4232 st = bb->stmts[j]; 4233 if (st->tag == Ist_NoOp) 4234 continue; 4235 4236 if (st->tag == Ist_Put) { 4237 /* Non-indexed Put. This can't give a binding, but we do 4238 need to check it doesn't invalidate the search by 4239 overlapping any part of the indexed guest state. */ 4240 4241 relation 4242 = getAliasingRelation_IC( 4243 descrG, ixG, 4244 st->Ist.Put.offset, 4245 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); 4246 4247 if (relation == NoAlias) { 4248 /* we're OK; keep going */ 4249 continue; 4250 } else { 4251 /* relation == UnknownAlias || relation == ExactAlias */ 4252 /* If this assertion fails, we've found a Put which writes 4253 an area of guest state which is read by a GetI. Which 4254 is unlikely (although not per se wrong). */ 4255 vassert(relation != ExactAlias); 4256 /* This Put potentially writes guest state that the GetI 4257 reads; we must fail. */ 4258 return NULL; 4259 } 4260 } 4261 4262 if (st->tag == Ist_PutI) { 4263 IRPutI *puti = st->Ist.PutI.details; 4264 4265 relation = getAliasingRelation_II( 4266 descrG, ixG, biasG, 4267 puti->descr, 4268 puti->ix, 4269 puti->bias 4270 ); 4271 4272 if (relation == NoAlias) { 4273 /* This PutI definitely doesn't overlap. Ignore it and 4274 keep going. */ 4275 continue; /* the for j loop */ 4276 } 4277 4278 if (relation == UnknownAlias) { 4279 /* We don't know if this PutI writes to the same guest 4280 state that the GetI, or not. So we have to give up. */ 4281 return NULL; 4282 } 4283 4284 /* Otherwise, we've found what we're looking for. */ 4285 vassert(relation == ExactAlias); 4286 return puti->data; 4287 4288 } /* if (st->tag == Ist_PutI) */ 4289 4290 if (st->tag == Ist_Dirty) { 4291 /* Be conservative. If the dirty call has any guest effects at 4292 all, give up. We could do better -- only give up if there 4293 are any guest writes/modifies. */ 4294 if (st->Ist.Dirty.details->nFxState > 0) 4295 return NULL; 4296 } 4297 4298 } /* for */ 4299 4300 /* No valid replacement was found. */ 4301 return NULL; 4302 } 4303 4304 4305 4306 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense 4307 that it writes exactly the same piece of guest state) ? Safe 4308 answer: False. */ 4309 4310 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 ) 4311 { 4312 vassert(pi->tag == Ist_PutI); 4313 if (s2->tag != Ist_PutI) 4314 return False; 4315 4316 IRPutI *p1 = pi->Ist.PutI.details; 4317 IRPutI *p2 = s2->Ist.PutI.details; 4318 4319 return toBool( 4320 getAliasingRelation_II( 4321 p1->descr, p1->ix, p1->bias, 4322 p2->descr, p2->ix, p2->bias 4323 ) 4324 == ExactAlias 4325 ); 4326 } 4327 4328 4329 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might 4330 overlap it? Safe answer: True. Note, we could do a lot better 4331 than this if needed. */ 4332 4333 static 4334 Bool guestAccessWhichMightOverlapPutI ( 4335 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2 4336 ) 4337 { 4338 GSAliasing relation; 4339 UInt minoffP, maxoffP; 4340 4341 vassert(pi->tag == Ist_PutI); 4342 4343 IRPutI *p1 = pi->Ist.PutI.details; 4344 4345 getArrayBounds(p1->descr, &minoffP, &maxoffP); 4346 switch (s2->tag) { 4347 4348 case Ist_NoOp: 4349 case Ist_IMark: 4350 return False; 4351 4352 case Ist_MBE: 4353 case Ist_AbiHint: 4354 /* just be paranoid ... these should be rare. */ 4355 return True; 4356 4357 case Ist_CAS: 4358 /* This is unbelievably lame, but it's probably not 4359 significant from a performance point of view. Really, a 4360 CAS is a load-store op, so it should be safe to say False. 4361 However .. */ 4362 return True; 4363 4364 case Ist_Dirty: 4365 /* If the dirty call has any guest effects at all, give up. 4366 Probably could do better. */ 4367 if (s2->Ist.Dirty.details->nFxState > 0) 4368 return True; 4369 return False; 4370 4371 case Ist_Put: 4372 vassert(isIRAtom(s2->Ist.Put.data)); 4373 relation 4374 = getAliasingRelation_IC( 4375 p1->descr, p1->ix, 4376 s2->Ist.Put.offset, 4377 typeOfIRExpr(tyenv,s2->Ist.Put.data) 4378 ); 4379 goto have_relation; 4380 4381 case Ist_PutI: { 4382 IRPutI *p2 = s2->Ist.PutI.details; 4383 4384 vassert(isIRAtom(p2->ix)); 4385 vassert(isIRAtom(p2->data)); 4386 relation 4387 = getAliasingRelation_II( 4388 p1->descr, p1->ix, p1->bias, 4389 p2->descr, p2->ix, p2->bias 4390 ); 4391 goto have_relation; 4392 } 4393 4394 case Ist_WrTmp: 4395 if (s2->Ist.WrTmp.data->tag == Iex_GetI) { 4396 relation 4397 = getAliasingRelation_II( 4398 p1->descr, p1->ix, p1->bias, 4399 s2->Ist.WrTmp.data->Iex.GetI.descr, 4400 s2->Ist.WrTmp.data->Iex.GetI.ix, 4401 s2->Ist.WrTmp.data->Iex.GetI.bias 4402 ); 4403 goto have_relation; 4404 } 4405 if (s2->Ist.WrTmp.data->tag == Iex_Get) { 4406 relation 4407 = getAliasingRelation_IC( 4408 p1->descr, p1->ix, 4409 s2->Ist.WrTmp.data->Iex.Get.offset, 4410 s2->Ist.WrTmp.data->Iex.Get.ty 4411 ); 4412 goto have_relation; 4413 } 4414 return False; 4415 4416 case Ist_Store: 4417 vassert(isIRAtom(s2->Ist.Store.addr)); 4418 vassert(isIRAtom(s2->Ist.Store.data)); 4419 return False; 4420 4421 default: 4422 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n"); 4423 vpanic("guestAccessWhichMightOverlapPutI"); 4424 } 4425 4426 have_relation: 4427 if (relation == NoAlias) 4428 return False; 4429 else 4430 return True; /* ExactAlias or UnknownAlias */ 4431 } 4432 4433 4434 4435 /* ---------- PutI/GetI transformations main functions --------- */ 4436 4437 /* Remove redundant GetIs, to the extent that they can be detected. 4438 bb is modified in-place. */ 4439 4440 static 4441 void do_redundant_GetI_elimination ( IRSB* bb ) 4442 { 4443 Int i; 4444 IRStmt* st; 4445 4446 for (i = bb->stmts_used-1; i >= 0; i--) { 4447 st = bb->stmts[i]; 4448 if (st->tag == Ist_NoOp) 4449 continue; 4450 4451 if (st->tag == Ist_WrTmp 4452 && st->Ist.WrTmp.data->tag == Iex_GetI 4453 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) { 4454 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr; 4455 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix; 4456 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias; 4457 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias); 4458 if (replacement 4459 && isIRAtom(replacement) 4460 /* Make sure we're doing a type-safe transformation! */ 4461 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) { 4462 if (DEBUG_IROPT) { 4463 vex_printf("rGI: "); 4464 ppIRExpr(st->Ist.WrTmp.data); 4465 vex_printf(" -> "); 4466 ppIRExpr(replacement); 4467 vex_printf("\n"); 4468 } 4469 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement); 4470 } 4471 } 4472 } 4473 4474 } 4475 4476 4477 /* Remove redundant PutIs, to the extent which they can be detected. 4478 bb is modified in-place. */ 4479 4480 static 4481 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl ) 4482 { 4483 Int i, j; 4484 Bool delete; 4485 IRStmt *st, *stj; 4486 4487 vassert(pxControl < VexRegUpdAllregsAtEachInsn); 4488 4489 for (i = 0; i < bb->stmts_used; i++) { 4490 st = bb->stmts[i]; 4491 if (st->tag != Ist_PutI) 4492 continue; 4493 /* Ok, search forwards from here to see if we can find another 4494 PutI which makes this one redundant, and dodging various 4495 hazards. Search forwards: 4496 * If conditional exit, give up (because anything after that 4497 does not postdominate this put). 4498 * If a Get which might overlap, give up (because this PutI 4499 not necessarily dead). 4500 * If a Put which is identical, stop with success. 4501 * If a Put which might overlap, but is not identical, give up. 4502 * If a dirty helper call which might write guest state, give up. 4503 * If a Put which definitely doesn't overlap, or any other 4504 kind of stmt, continue. 4505 */ 4506 delete = False; 4507 for (j = i+1; j < bb->stmts_used; j++) { 4508 stj = bb->stmts[j]; 4509 if (stj->tag == Ist_NoOp) 4510 continue; 4511 if (identicalPutIs(st, stj)) { 4512 /* success! */ 4513 delete = True; 4514 break; 4515 } 4516 if (stj->tag == Ist_Exit) 4517 /* give up */ 4518 break; 4519 if (st->tag == Ist_Dirty) 4520 /* give up; could do better here */ 4521 break; 4522 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj)) 4523 /* give up */ 4524 break; 4525 } 4526 4527 if (delete) { 4528 if (DEBUG_IROPT) { 4529 vex_printf("rPI: "); 4530 ppIRStmt(st); 4531 vex_printf("\n"); 4532 } 4533 bb->stmts[i] = IRStmt_NoOp(); 4534 } 4535 4536 } 4537 } 4538 4539 4540 /*---------------------------------------------------------------*/ 4541 /*--- Loop unrolling ---*/ 4542 /*---------------------------------------------------------------*/ 4543 4544 /* Adjust all tmp values (names) in e by delta. e is destructively 4545 modified. */ 4546 4547 static void deltaIRExpr ( IRExpr* e, Int delta ) 4548 { 4549 Int i; 4550 switch (e->tag) { 4551 case Iex_RdTmp: 4552 e->Iex.RdTmp.tmp += delta; 4553 break; 4554 case Iex_Get: 4555 case Iex_Const: 4556 break; 4557 case Iex_GetI: 4558 deltaIRExpr(e->Iex.GetI.ix, delta); 4559 break; 4560 case Iex_Qop: 4561 deltaIRExpr(e->Iex.Qop.details->arg1, delta); 4562 deltaIRExpr(e->Iex.Qop.details->arg2, delta); 4563 deltaIRExpr(e->Iex.Qop.details->arg3, delta); 4564 deltaIRExpr(e->Iex.Qop.details->arg4, delta); 4565 break; 4566 case Iex_Triop: 4567 deltaIRExpr(e->Iex.Triop.details->arg1, delta); 4568 deltaIRExpr(e->Iex.Triop.details->arg2, delta); 4569 deltaIRExpr(e->Iex.Triop.details->arg3, delta); 4570 break; 4571 case Iex_Binop: 4572 deltaIRExpr(e->Iex.Binop.arg1, delta); 4573 deltaIRExpr(e->Iex.Binop.arg2, delta); 4574 break; 4575 case Iex_Unop: 4576 deltaIRExpr(e->Iex.Unop.arg, delta); 4577 break; 4578 case Iex_Load: 4579 deltaIRExpr(e->Iex.Load.addr, delta); 4580 break; 4581 case Iex_CCall: 4582 for (i = 0; e->Iex.CCall.args[i]; i++) 4583 deltaIRExpr(e->Iex.CCall.args[i], delta); 4584 break; 4585 case Iex_ITE: 4586 deltaIRExpr(e->Iex.ITE.cond, delta); 4587 deltaIRExpr(e->Iex.ITE.iftrue, delta); 4588 deltaIRExpr(e->Iex.ITE.iffalse, delta); 4589 break; 4590 default: 4591 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 4592 vpanic("deltaIRExpr"); 4593 } 4594 } 4595 4596 /* Adjust all tmp values (names) in st by delta. st is destructively 4597 modified. */ 4598 4599 static void deltaIRStmt ( IRStmt* st, Int delta ) 4600 { 4601 Int i; 4602 IRDirty* d; 4603 switch (st->tag) { 4604 case Ist_NoOp: 4605 case Ist_IMark: 4606 case Ist_MBE: 4607 break; 4608 case Ist_AbiHint: 4609 deltaIRExpr(st->Ist.AbiHint.base, delta); 4610 deltaIRExpr(st->Ist.AbiHint.nia, delta); 4611 break; 4612 case Ist_Put: 4613 deltaIRExpr(st->Ist.Put.data, delta); 4614 break; 4615 case Ist_PutI: 4616 deltaIRExpr(st->Ist.PutI.details->ix, delta); 4617 deltaIRExpr(st->Ist.PutI.details->data, delta); 4618 break; 4619 case Ist_WrTmp: 4620 st->Ist.WrTmp.tmp += delta; 4621 deltaIRExpr(st->Ist.WrTmp.data, delta); 4622 break; 4623 case Ist_Exit: 4624 deltaIRExpr(st->Ist.Exit.guard, delta); 4625 break; 4626 case Ist_Store: 4627 deltaIRExpr(st->Ist.Store.addr, delta); 4628 deltaIRExpr(st->Ist.Store.data, delta); 4629 break; 4630 case Ist_StoreG: { 4631 IRStoreG* sg = st->Ist.StoreG.details; 4632 deltaIRExpr(sg->addr, delta); 4633 deltaIRExpr(sg->data, delta); 4634 deltaIRExpr(sg->guard, delta); 4635 break; 4636 } 4637 case Ist_LoadG: { 4638 IRLoadG* lg = st->Ist.LoadG.details; 4639 lg->dst += delta; 4640 deltaIRExpr(lg->addr, delta); 4641 deltaIRExpr(lg->alt, delta); 4642 deltaIRExpr(lg->guard, delta); 4643 break; 4644 } 4645 case Ist_CAS: 4646 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID) 4647 st->Ist.CAS.details->oldHi += delta; 4648 st->Ist.CAS.details->oldLo += delta; 4649 deltaIRExpr(st->Ist.CAS.details->addr, delta); 4650 if (st->Ist.CAS.details->expdHi) 4651 deltaIRExpr(st->Ist.CAS.details->expdHi, delta); 4652 deltaIRExpr(st->Ist.CAS.details->expdLo, delta); 4653 if (st->Ist.CAS.details->dataHi) 4654 deltaIRExpr(st->Ist.CAS.details->dataHi, delta); 4655 deltaIRExpr(st->Ist.CAS.details->dataLo, delta); 4656 break; 4657 case Ist_LLSC: 4658 st->Ist.LLSC.result += delta; 4659 deltaIRExpr(st->Ist.LLSC.addr, delta); 4660 if (st->Ist.LLSC.storedata) 4661 deltaIRExpr(st->Ist.LLSC.storedata, delta); 4662 break; 4663 case Ist_Dirty: 4664 d = st->Ist.Dirty.details; 4665 deltaIRExpr(d->guard, delta); 4666 for (i = 0; d->args[i]; i++) { 4667 IRExpr* arg = d->args[i]; 4668 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 4669 deltaIRExpr(arg, delta); 4670 } 4671 if (d->tmp != IRTemp_INVALID) 4672 d->tmp += delta; 4673 if (d->mAddr) 4674 deltaIRExpr(d->mAddr, delta); 4675 break; 4676 default: 4677 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 4678 vpanic("deltaIRStmt"); 4679 } 4680 } 4681 4682 4683 /* If possible, return a loop-unrolled version of bb0. The original 4684 is changed. If not possible, return NULL. */ 4685 4686 /* The two schemas considered are: 4687 4688 X: BODY; goto X 4689 4690 which unrolls to (eg) X: BODY;BODY; goto X 4691 4692 and 4693 4694 X: BODY; if (c) goto X; goto Y 4695 which trivially transforms to 4696 X: BODY; if (!c) goto Y; goto X; 4697 so it falls in the scope of the first case. 4698 4699 X and Y must be literal (guest) addresses. 4700 */ 4701 4702 static Int calc_unroll_factor( IRSB* bb ) 4703 { 4704 Int n_stmts, i; 4705 4706 n_stmts = 0; 4707 for (i = 0; i < bb->stmts_used; i++) { 4708 if (bb->stmts[i]->tag != Ist_NoOp) 4709 n_stmts++; 4710 } 4711 4712 if (n_stmts <= vex_control.iropt_unroll_thresh/8) { 4713 if (vex_control.iropt_verbosity > 0) 4714 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n", 4715 n_stmts, 8* n_stmts); 4716 return 8; 4717 } 4718 if (n_stmts <= vex_control.iropt_unroll_thresh/4) { 4719 if (vex_control.iropt_verbosity > 0) 4720 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n", 4721 n_stmts, 4* n_stmts); 4722 return 4; 4723 } 4724 4725 if (n_stmts <= vex_control.iropt_unroll_thresh/2) { 4726 if (vex_control.iropt_verbosity > 0) 4727 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n", 4728 n_stmts, 2* n_stmts); 4729 return 2; 4730 } 4731 4732 if (vex_control.iropt_verbosity > 0) 4733 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts); 4734 4735 return 1; 4736 } 4737 4738 4739 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr ) 4740 { 4741 Int i, j, jmax, n_vars; 4742 Bool xxx_known; 4743 Addr64 xxx_value, yyy_value; 4744 IRExpr* udst; 4745 IRStmt* st; 4746 IRConst* con; 4747 IRSB *bb1, *bb2; 4748 Int unroll_factor; 4749 4750 if (vex_control.iropt_unroll_thresh <= 0) 4751 return NULL; 4752 4753 /* First off, figure out if we can unroll this loop. Do this 4754 without modifying bb0. */ 4755 4756 if (bb0->jumpkind != Ijk_Boring) 4757 return NULL; 4758 4759 xxx_known = False; 4760 xxx_value = 0; 4761 4762 /* Extract the next-guest address. If it isn't a literal, we 4763 have to give up. */ 4764 4765 udst = bb0->next; 4766 if (udst->tag == Iex_Const 4767 && (udst->Iex.Const.con->tag == Ico_U32 4768 || udst->Iex.Const.con->tag == Ico_U64)) { 4769 /* The BB ends in a jump to a literal location. */ 4770 xxx_known = True; 4771 xxx_value = udst->Iex.Const.con->tag == Ico_U64 4772 ? udst->Iex.Const.con->Ico.U64 4773 : (Addr64)(udst->Iex.Const.con->Ico.U32); 4774 } 4775 4776 if (!xxx_known) 4777 return NULL; 4778 4779 /* Now we know the BB ends to a jump to a literal location. If 4780 it's a jump to itself (viz, idiom #1), move directly to the 4781 unrolling stage, first cloning the bb so the original isn't 4782 modified. */ 4783 if (xxx_value == my_addr) { 4784 unroll_factor = calc_unroll_factor( bb0 ); 4785 if (unroll_factor < 2) 4786 return NULL; 4787 bb1 = deepCopyIRSB( bb0 ); 4788 bb0 = NULL; 4789 udst = NULL; /* is now invalid */ 4790 goto do_unroll; 4791 } 4792 4793 /* Search for the second idiomatic form: 4794 X: BODY; if (c) goto X; goto Y 4795 We know Y, but need to establish that the last stmt 4796 is 'if (c) goto X'. 4797 */ 4798 yyy_value = xxx_value; 4799 for (i = bb0->stmts_used-1; i >= 0; i--) 4800 if (bb0->stmts[i]) 4801 break; 4802 4803 if (i < 0) 4804 return NULL; /* block with no stmts. Strange. */ 4805 4806 st = bb0->stmts[i]; 4807 if (st->tag != Ist_Exit) 4808 return NULL; 4809 if (st->Ist.Exit.jk != Ijk_Boring) 4810 return NULL; 4811 4812 con = st->Ist.Exit.dst; 4813 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4814 4815 xxx_value = con->tag == Ico_U64 4816 ? st->Ist.Exit.dst->Ico.U64 4817 : (Addr64)(st->Ist.Exit.dst->Ico.U32); 4818 4819 /* If this assertion fails, we have some kind of type error. */ 4820 vassert(con->tag == udst->Iex.Const.con->tag); 4821 4822 if (xxx_value != my_addr) 4823 /* We didn't find either idiom. Give up. */ 4824 return NULL; 4825 4826 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and 4827 yyy values (which makes it look like idiom #1), and go into 4828 unrolling proper. This means finding (again) the last stmt, in 4829 the copied BB. */ 4830 4831 unroll_factor = calc_unroll_factor( bb0 ); 4832 if (unroll_factor < 2) 4833 return NULL; 4834 4835 bb1 = deepCopyIRSB( bb0 ); 4836 bb0 = NULL; 4837 udst = NULL; /* is now invalid */ 4838 for (i = bb1->stmts_used-1; i >= 0; i--) 4839 if (bb1->stmts[i]) 4840 break; 4841 4842 /* The next bunch of assertions should be true since we already 4843 found and checked the last stmt in the original bb. */ 4844 4845 vassert(i >= 0); 4846 4847 st = bb1->stmts[i]; 4848 vassert(st->tag == Ist_Exit); 4849 4850 con = st->Ist.Exit.dst; 4851 vassert(con->tag == Ico_U32 || con->tag == Ico_U64); 4852 4853 udst = bb1->next; 4854 vassert(udst->tag == Iex_Const); 4855 vassert(udst->Iex.Const.con->tag == Ico_U32 4856 || udst->Iex.Const.con->tag == Ico_U64); 4857 vassert(con->tag == udst->Iex.Const.con->tag); 4858 4859 /* switch the xxx and yyy fields around */ 4860 if (con->tag == Ico_U64) { 4861 udst->Iex.Const.con->Ico.U64 = xxx_value; 4862 con->Ico.U64 = yyy_value; 4863 } else { 4864 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value; 4865 con->Ico.U32 = (UInt)yyy_value; 4866 } 4867 4868 /* negate the test condition */ 4869 st->Ist.Exit.guard 4870 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard)); 4871 4872 /* --- The unroller proper. Both idioms are by now --- */ 4873 /* --- now converted to idiom 1. --- */ 4874 4875 do_unroll: 4876 4877 vassert(unroll_factor == 2 4878 || unroll_factor == 4 4879 || unroll_factor == 8); 4880 4881 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1); 4882 for (j = 1; j <= jmax; j++) { 4883 4884 n_vars = bb1->tyenv->types_used; 4885 4886 bb2 = deepCopyIRSB(bb1); 4887 for (i = 0; i < n_vars; i++) 4888 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]); 4889 4890 for (i = 0; i < bb2->stmts_used; i++) { 4891 /* deltaIRStmt destructively modifies the stmt, but 4892 that's OK since bb2 is a complete fresh copy of bb1. */ 4893 deltaIRStmt(bb2->stmts[i], n_vars); 4894 addStmtToIRSB(bb1, bb2->stmts[i]); 4895 } 4896 } 4897 4898 if (DEBUG_IROPT) { 4899 vex_printf("\nUNROLLED (%lx)\n", my_addr); 4900 ppIRSB(bb1); 4901 vex_printf("\n"); 4902 } 4903 4904 /* Flattening; sigh. The unroller succeeds in breaking flatness 4905 by negating the test condition. This should be fixed properly. 4906 For the moment use this shotgun approach. */ 4907 return flatten_BB(bb1); 4908 } 4909 4910 4911 /*---------------------------------------------------------------*/ 4912 /*--- The tree builder ---*/ 4913 /*---------------------------------------------------------------*/ 4914 4915 /* This isn't part of IR optimisation. Really it's a pass done prior 4916 to instruction selection, which improves the code that the 4917 instruction selector can produce. */ 4918 4919 /* --- The 'tmp' environment is the central data structure here --- */ 4920 4921 /* The number of outstanding bindings we're prepared to track. 4922 The number of times the env becomes full and we have to dump 4923 the oldest binding (hence reducing code quality) falls very 4924 rapidly as the env size increases. 8 gives reasonable performance 4925 under most circumstances. */ 4926 #define A_NENV 10 4927 4928 /* An interval. Used to record the bytes in the guest state accessed 4929 by a Put[I] statement or by (one or more) Get[I] expression(s). In 4930 case of several Get[I] expressions, the lower/upper bounds are recorded. 4931 This is conservative but cheap. 4932 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107]. 4933 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at 4934 offset 200 would be recorded as [100,203] */ 4935 typedef 4936 struct { 4937 Bool present; 4938 Int low; 4939 Int high; 4940 } 4941 Interval; 4942 4943 static inline Bool 4944 intervals_overlap(Interval i1, Interval i2) 4945 { 4946 return (i1.low >= i2.low && i1.low <= i2.high) || 4947 (i2.low >= i1.low && i2.low <= i1.high); 4948 } 4949 4950 static inline void 4951 update_interval(Interval *i, Int low, Int high) 4952 { 4953 vassert(low <= high); 4954 4955 if (i->present) { 4956 if (low < i->low) i->low = low; 4957 if (high > i->high) i->high = high; 4958 } else { 4959 i->present = True; 4960 i->low = low; 4961 i->high = high; 4962 } 4963 } 4964 4965 4966 /* bindee == NULL === slot is not in use 4967 bindee != NULL === slot is in use 4968 */ 4969 typedef 4970 struct { 4971 IRTemp binder; 4972 IRExpr* bindee; 4973 Bool doesLoad; 4974 /* Record the bytes of the guest state BINDEE reads from. */ 4975 Interval getInterval; 4976 } 4977 ATmpInfo; 4978 4979 __attribute__((unused)) 4980 static void ppAEnv ( ATmpInfo* env ) 4981 { 4982 Int i; 4983 for (i = 0; i < A_NENV; i++) { 4984 vex_printf("%d tmp %d val ", i, (Int)env[i].binder); 4985 if (env[i].bindee) 4986 ppIRExpr(env[i].bindee); 4987 else 4988 vex_printf("(null)"); 4989 vex_printf("\n"); 4990 } 4991 } 4992 4993 /* --- Tree-traversal fns --- */ 4994 4995 /* Traverse an expr, and detect if any part of it reads memory or does 4996 a Get. Be careful ... this really controls how much the 4997 tree-builder can reorder the code, so getting it right is critical. 4998 */ 4999 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e ) 5000 { 5001 Int i; 5002 switch (e->tag) { 5003 case Iex_CCall: 5004 for (i = 0; e->Iex.CCall.args[i]; i++) 5005 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]); 5006 return; 5007 case Iex_ITE: 5008 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond); 5009 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue); 5010 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse); 5011 return; 5012 case Iex_Qop: 5013 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1); 5014 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2); 5015 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3); 5016 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4); 5017 return; 5018 case Iex_Triop: 5019 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1); 5020 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2); 5021 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3); 5022 return; 5023 case Iex_Binop: 5024 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1); 5025 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2); 5026 return; 5027 case Iex_Unop: 5028 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg); 5029 return; 5030 case Iex_Load: 5031 *doesLoad = True; 5032 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr); 5033 return; 5034 case Iex_Get: { 5035 Int low = e->Iex.Get.offset; 5036 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1; 5037 update_interval(getInterval, low, high); 5038 return; 5039 } 5040 case Iex_GetI: { 5041 IRRegArray *descr = e->Iex.GetI.descr; 5042 Int size = sizeofIRType(descr->elemTy); 5043 Int low = descr->base; 5044 Int high = low + descr->nElems * size - 1; 5045 update_interval(getInterval, low, high); 5046 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix); 5047 return; 5048 } 5049 case Iex_RdTmp: 5050 case Iex_Const: 5051 return; 5052 default: 5053 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5054 vpanic("setHints_Expr"); 5055 } 5056 } 5057 5058 5059 /* Add a binding to the front of the env and slide all the rest 5060 backwards. It should be the case that the last slot is free. */ 5061 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee ) 5062 { 5063 Int i; 5064 vassert(env[A_NENV-1].bindee == NULL); 5065 for (i = A_NENV-1; i >= 1; i--) 5066 env[i] = env[i-1]; 5067 env[0].binder = binder; 5068 env[0].bindee = bindee; 5069 env[0].doesLoad = False; /* filled in later */ 5070 env[0].getInterval.present = False; /* filled in later */ 5071 env[0].getInterval.low = -1; /* filled in later */ 5072 env[0].getInterval.high = -1; /* filled in later */ 5073 } 5074 5075 /* Given uses :: array of UShort, indexed by IRTemp 5076 Add the use-occurrences of temps in this expression 5077 to the env. 5078 */ 5079 static void aoccCount_Expr ( UShort* uses, IRExpr* e ) 5080 { 5081 Int i; 5082 5083 switch (e->tag) { 5084 5085 case Iex_RdTmp: /* the only interesting case */ 5086 uses[e->Iex.RdTmp.tmp]++; 5087 return; 5088 5089 case Iex_ITE: 5090 aoccCount_Expr(uses, e->Iex.ITE.cond); 5091 aoccCount_Expr(uses, e->Iex.ITE.iftrue); 5092 aoccCount_Expr(uses, e->Iex.ITE.iffalse); 5093 return; 5094 5095 case Iex_Qop: 5096 aoccCount_Expr(uses, e->Iex.Qop.details->arg1); 5097 aoccCount_Expr(uses, e->Iex.Qop.details->arg2); 5098 aoccCount_Expr(uses, e->Iex.Qop.details->arg3); 5099 aoccCount_Expr(uses, e->Iex.Qop.details->arg4); 5100 return; 5101 5102 case Iex_Triop: 5103 aoccCount_Expr(uses, e->Iex.Triop.details->arg1); 5104 aoccCount_Expr(uses, e->Iex.Triop.details->arg2); 5105 aoccCount_Expr(uses, e->Iex.Triop.details->arg3); 5106 return; 5107 5108 case Iex_Binop: 5109 aoccCount_Expr(uses, e->Iex.Binop.arg1); 5110 aoccCount_Expr(uses, e->Iex.Binop.arg2); 5111 return; 5112 5113 case Iex_Unop: 5114 aoccCount_Expr(uses, e->Iex.Unop.arg); 5115 return; 5116 5117 case Iex_Load: 5118 aoccCount_Expr(uses, e->Iex.Load.addr); 5119 return; 5120 5121 case Iex_CCall: 5122 for (i = 0; e->Iex.CCall.args[i]; i++) 5123 aoccCount_Expr(uses, e->Iex.CCall.args[i]); 5124 return; 5125 5126 case Iex_GetI: 5127 aoccCount_Expr(uses, e->Iex.GetI.ix); 5128 return; 5129 5130 case Iex_Const: 5131 case Iex_Get: 5132 return; 5133 5134 default: 5135 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5136 vpanic("aoccCount_Expr"); 5137 } 5138 } 5139 5140 5141 /* Given uses :: array of UShort, indexed by IRTemp 5142 Add the use-occurrences of temps in this statement 5143 to the env. 5144 */ 5145 static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) 5146 { 5147 Int i; 5148 IRDirty* d; 5149 IRCAS* cas; 5150 switch (st->tag) { 5151 case Ist_AbiHint: 5152 aoccCount_Expr(uses, st->Ist.AbiHint.base); 5153 aoccCount_Expr(uses, st->Ist.AbiHint.nia); 5154 return; 5155 case Ist_WrTmp: 5156 aoccCount_Expr(uses, st->Ist.WrTmp.data); 5157 return; 5158 case Ist_Put: 5159 aoccCount_Expr(uses, st->Ist.Put.data); 5160 return; 5161 case Ist_PutI: 5162 aoccCount_Expr(uses, st->Ist.PutI.details->ix); 5163 aoccCount_Expr(uses, st->Ist.PutI.details->data); 5164 return; 5165 case Ist_Store: 5166 aoccCount_Expr(uses, st->Ist.Store.addr); 5167 aoccCount_Expr(uses, st->Ist.Store.data); 5168 return; 5169 case Ist_StoreG: { 5170 IRStoreG* sg = st->Ist.StoreG.details; 5171 aoccCount_Expr(uses, sg->addr); 5172 aoccCount_Expr(uses, sg->data); 5173 aoccCount_Expr(uses, sg->guard); 5174 return; 5175 } 5176 case Ist_LoadG: { 5177 IRLoadG* lg = st->Ist.LoadG.details; 5178 aoccCount_Expr(uses, lg->addr); 5179 aoccCount_Expr(uses, lg->alt); 5180 aoccCount_Expr(uses, lg->guard); 5181 return; 5182 } 5183 case Ist_CAS: 5184 cas = st->Ist.CAS.details; 5185 aoccCount_Expr(uses, cas->addr); 5186 if (cas->expdHi) 5187 aoccCount_Expr(uses, cas->expdHi); 5188 aoccCount_Expr(uses, cas->expdLo); 5189 if (cas->dataHi) 5190 aoccCount_Expr(uses, cas->dataHi); 5191 aoccCount_Expr(uses, cas->dataLo); 5192 return; 5193 case Ist_LLSC: 5194 aoccCount_Expr(uses, st->Ist.LLSC.addr); 5195 if (st->Ist.LLSC.storedata) 5196 aoccCount_Expr(uses, st->Ist.LLSC.storedata); 5197 return; 5198 case Ist_Dirty: 5199 d = st->Ist.Dirty.details; 5200 if (d->mFx != Ifx_None) 5201 aoccCount_Expr(uses, d->mAddr); 5202 aoccCount_Expr(uses, d->guard); 5203 for (i = 0; d->args[i]; i++) { 5204 IRExpr* arg = d->args[i]; 5205 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5206 aoccCount_Expr(uses, arg); 5207 } 5208 return; 5209 case Ist_NoOp: 5210 case Ist_IMark: 5211 case Ist_MBE: 5212 return; 5213 case Ist_Exit: 5214 aoccCount_Expr(uses, st->Ist.Exit.guard); 5215 return; 5216 default: 5217 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5218 vpanic("aoccCount_Stmt"); 5219 } 5220 } 5221 5222 /* Look up a binding for tmp in the env. If found, return the bound 5223 expression, and set the env's binding to NULL so it is marked as 5224 used. If not found, return NULL. */ 5225 5226 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 5227 { 5228 Int i; 5229 for (i = 0; i < A_NENV; i++) { 5230 if (env[i].binder == tmp && env[i].bindee != NULL) { 5231 IRExpr* bindee = env[i].bindee; 5232 env[i].bindee = NULL; 5233 return bindee; 5234 } 5235 } 5236 return NULL; 5237 } 5238 5239 /* Traverse e, looking for temps. For each observed temp, see if env 5240 contains a binding for the temp, and if so return the bound value. 5241 The env has the property that any binding it holds is 5242 'single-shot', so once a binding is used, it is marked as no longer 5243 available, by setting its .bindee field to NULL. */ 5244 5245 static inline Bool is_Unop ( IRExpr* e, IROp op ) { 5246 return e->tag == Iex_Unop && e->Iex.Unop.op == op; 5247 } 5248 static inline Bool is_Binop ( IRExpr* e, IROp op ) { 5249 return e->tag == Iex_Binop && e->Iex.Binop.op == op; 5250 } 5251 5252 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 5253 { 5254 switch (op) { 5255 case Iop_Or32: 5256 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ 5257 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) 5258 return IRExpr_Unop( Iop_CmpwNEZ32, 5259 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 5260 a2->Iex.Unop.arg ) ); 5261 break; 5262 5263 case Iop_CmpNE32: 5264 /* Since X has type Ity_I1 we can simplify: 5265 CmpNE32(1Uto32(X),0)) ==> X */ 5266 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2)) 5267 return a1->Iex.Unop.arg; 5268 break; 5269 5270 default: 5271 break; 5272 } 5273 /* no reduction rule applies */ 5274 return IRExpr_Binop( op, a1, a2 ); 5275 } 5276 5277 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) 5278 { 5279 switch (op) { 5280 case Iop_CmpwNEZ64: 5281 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */ 5282 if (is_Unop(aa, Iop_CmpwNEZ64)) 5283 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg ); 5284 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5285 if (is_Binop(aa, Iop_Or64) 5286 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) 5287 return fold_IRExpr_Unop( 5288 Iop_CmpwNEZ64, 5289 IRExpr_Binop(Iop_Or64, 5290 aa->Iex.Binop.arg1->Iex.Unop.arg, 5291 aa->Iex.Binop.arg2)); 5292 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ 5293 if (is_Binop(aa, Iop_Or64) 5294 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) 5295 return fold_IRExpr_Unop( 5296 Iop_CmpwNEZ64, 5297 IRExpr_Binop(Iop_Or64, 5298 aa->Iex.Binop.arg1, 5299 aa->Iex.Binop.arg2->Iex.Unop.arg)); 5300 break; 5301 case Iop_CmpNEZ64: 5302 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ 5303 if (is_Unop(aa, Iop_Left64)) 5304 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); 5305 /* CmpNEZ64( 1Uto64(X) ) --> X */ 5306 if (is_Unop(aa, Iop_1Uto64)) 5307 return aa->Iex.Unop.arg; 5308 break; 5309 case Iop_CmpwNEZ32: 5310 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ 5311 if (is_Unop(aa, Iop_CmpwNEZ32)) 5312 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); 5313 break; 5314 case Iop_CmpNEZ32: 5315 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ 5316 if (is_Unop(aa, Iop_Left32)) 5317 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); 5318 /* CmpNEZ32( 1Uto32(X) ) --> X */ 5319 if (is_Unop(aa, Iop_1Uto32)) 5320 return aa->Iex.Unop.arg; 5321 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */ 5322 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64)) 5323 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg); 5324 break; 5325 case Iop_CmpNEZ8: 5326 /* CmpNEZ8( 1Uto8(X) ) --> X */ 5327 if (is_Unop(aa, Iop_1Uto8)) 5328 return aa->Iex.Unop.arg; 5329 break; 5330 case Iop_Left32: 5331 /* Left32( Left32(x) ) --> Left32(x) */ 5332 if (is_Unop(aa, Iop_Left32)) 5333 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); 5334 break; 5335 case Iop_Left64: 5336 /* Left64( Left64(x) ) --> Left64(x) */ 5337 if (is_Unop(aa, Iop_Left64)) 5338 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg ); 5339 break; 5340 case Iop_ZeroHI64ofV128: 5341 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */ 5342 if (is_Unop(aa, Iop_ZeroHI64ofV128)) 5343 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg ); 5344 break; 5345 case Iop_32to1: 5346 /* 32to1( 1Uto32 ( x ) ) --> x */ 5347 if (is_Unop(aa, Iop_1Uto32)) 5348 return aa->Iex.Unop.arg; 5349 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ 5350 if (is_Unop(aa, Iop_CmpwNEZ32)) 5351 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); 5352 break; 5353 case Iop_64to1: 5354 /* 64to1( 1Uto64 ( x ) ) --> x */ 5355 if (is_Unop(aa, Iop_1Uto64)) 5356 return aa->Iex.Unop.arg; 5357 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ 5358 if (is_Unop(aa, Iop_CmpwNEZ64)) 5359 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); 5360 break; 5361 case Iop_64to32: 5362 /* 64to32( 32Uto64 ( x )) --> x */ 5363 if (is_Unop(aa, Iop_32Uto64)) 5364 return aa->Iex.Unop.arg; 5365 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */ 5366 if (is_Unop(aa, Iop_8Uto64)) 5367 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg); 5368 break; 5369 5370 case Iop_32Uto64: 5371 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */ 5372 if (is_Unop(aa, Iop_8Uto32)) 5373 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg); 5374 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */ 5375 if (is_Unop(aa, Iop_16Uto32)) 5376 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg); 5377 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh )) 5378 --> Shr64( 32Uto64(64to32(x)), sh )) */ 5379 if (is_Unop(aa, Iop_64to32) 5380 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64) 5381 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5382 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5383 Iop_64to32)) { 5384 return aa->Iex.Unop.arg; 5385 } 5386 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh )) 5387 --> 32Uto64(64to32( Shl64( x, sh )) */ 5388 if (is_Unop(aa, Iop_64to32) 5389 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64) 5390 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64) 5391 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg, 5392 Iop_64to32)) { 5393 return 5394 IRExpr_Unop( 5395 Iop_32Uto64, 5396 IRExpr_Unop( 5397 Iop_64to32, 5398 IRExpr_Binop( 5399 Iop_Shl64, 5400 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg, 5401 aa->Iex.Unop.arg->Iex.Binop.arg2 5402 ))); 5403 } 5404 break; 5405 5406 case Iop_1Sto32: 5407 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */ 5408 if (is_Unop(aa, Iop_CmpNEZ8) 5409 && is_Unop(aa->Iex.Unop.arg, Iop_32to8) 5410 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32) 5411 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg, 5412 Iop_CmpNEZ32)) { 5413 return IRExpr_Unop( Iop_CmpwNEZ32, 5414 aa->Iex.Unop.arg->Iex.Unop.arg 5415 ->Iex.Unop.arg->Iex.Unop.arg); 5416 } 5417 break; 5418 5419 default: 5420 break; 5421 } 5422 /* no reduction rule applies */ 5423 return IRExpr_Unop( op, aa ); 5424 } 5425 5426 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) 5427 { 5428 IRExpr* e2; 5429 IRExpr** args2; 5430 Int i; 5431 5432 switch (e->tag) { 5433 5434 case Iex_CCall: 5435 args2 = shallowCopyIRExprVec(e->Iex.CCall.args); 5436 for (i = 0; args2[i]; i++) 5437 args2[i] = atbSubst_Expr(env,args2[i]); 5438 return IRExpr_CCall( 5439 e->Iex.CCall.cee, 5440 e->Iex.CCall.retty, 5441 args2 5442 ); 5443 case Iex_RdTmp: 5444 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp); 5445 return e2 ? e2 : e; 5446 case Iex_ITE: 5447 return IRExpr_ITE( 5448 atbSubst_Expr(env, e->Iex.ITE.cond), 5449 atbSubst_Expr(env, e->Iex.ITE.iftrue), 5450 atbSubst_Expr(env, e->Iex.ITE.iffalse) 5451 ); 5452 case Iex_Qop: 5453 return IRExpr_Qop( 5454 e->Iex.Qop.details->op, 5455 atbSubst_Expr(env, e->Iex.Qop.details->arg1), 5456 atbSubst_Expr(env, e->Iex.Qop.details->arg2), 5457 atbSubst_Expr(env, e->Iex.Qop.details->arg3), 5458 atbSubst_Expr(env, e->Iex.Qop.details->arg4) 5459 ); 5460 case Iex_Triop: 5461 return IRExpr_Triop( 5462 e->Iex.Triop.details->op, 5463 atbSubst_Expr(env, e->Iex.Triop.details->arg1), 5464 atbSubst_Expr(env, e->Iex.Triop.details->arg2), 5465 atbSubst_Expr(env, e->Iex.Triop.details->arg3) 5466 ); 5467 case Iex_Binop: 5468 return fold_IRExpr_Binop( 5469 e->Iex.Binop.op, 5470 atbSubst_Expr(env, e->Iex.Binop.arg1), 5471 atbSubst_Expr(env, e->Iex.Binop.arg2) 5472 ); 5473 case Iex_Unop: 5474 return fold_IRExpr_Unop( 5475 e->Iex.Unop.op, 5476 atbSubst_Expr(env, e->Iex.Unop.arg) 5477 ); 5478 case Iex_Load: 5479 return IRExpr_Load( 5480 e->Iex.Load.end, 5481 e->Iex.Load.ty, 5482 atbSubst_Expr(env, e->Iex.Load.addr) 5483 ); 5484 case Iex_GetI: 5485 return IRExpr_GetI( 5486 e->Iex.GetI.descr, 5487 atbSubst_Expr(env, e->Iex.GetI.ix), 5488 e->Iex.GetI.bias 5489 ); 5490 case Iex_Const: 5491 case Iex_Get: 5492 return e; 5493 default: 5494 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); 5495 vpanic("atbSubst_Expr"); 5496 } 5497 } 5498 5499 /* Same deal as atbSubst_Expr, except for stmts. */ 5500 5501 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) 5502 { 5503 Int i; 5504 IRDirty *d, *d2; 5505 IRCAS *cas, *cas2; 5506 IRPutI *puti, *puti2; 5507 5508 switch (st->tag) { 5509 case Ist_AbiHint: 5510 return IRStmt_AbiHint( 5511 atbSubst_Expr(env, st->Ist.AbiHint.base), 5512 st->Ist.AbiHint.len, 5513 atbSubst_Expr(env, st->Ist.AbiHint.nia) 5514 ); 5515 case Ist_Store: 5516 return IRStmt_Store( 5517 st->Ist.Store.end, 5518 atbSubst_Expr(env, st->Ist.Store.addr), 5519 atbSubst_Expr(env, st->Ist.Store.data) 5520 ); 5521 case Ist_StoreG: { 5522 IRStoreG* sg = st->Ist.StoreG.details; 5523 return IRStmt_StoreG(sg->end, 5524 atbSubst_Expr(env, sg->addr), 5525 atbSubst_Expr(env, sg->data), 5526 atbSubst_Expr(env, sg->guard)); 5527 } 5528 case Ist_LoadG: { 5529 IRLoadG* lg = st->Ist.LoadG.details; 5530 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, 5531 atbSubst_Expr(env, lg->addr), 5532 atbSubst_Expr(env, lg->alt), 5533 atbSubst_Expr(env, lg->guard)); 5534 } 5535 case Ist_WrTmp: 5536 return IRStmt_WrTmp( 5537 st->Ist.WrTmp.tmp, 5538 atbSubst_Expr(env, st->Ist.WrTmp.data) 5539 ); 5540 case Ist_Put: 5541 return IRStmt_Put( 5542 st->Ist.Put.offset, 5543 atbSubst_Expr(env, st->Ist.Put.data) 5544 ); 5545 case Ist_PutI: 5546 puti = st->Ist.PutI.details; 5547 puti2 = mkIRPutI(puti->descr, 5548 atbSubst_Expr(env, puti->ix), 5549 puti->bias, 5550 atbSubst_Expr(env, puti->data)); 5551 return IRStmt_PutI(puti2); 5552 5553 case Ist_Exit: 5554 return IRStmt_Exit( 5555 atbSubst_Expr(env, st->Ist.Exit.guard), 5556 st->Ist.Exit.jk, 5557 st->Ist.Exit.dst, 5558 st->Ist.Exit.offsIP 5559 ); 5560 case Ist_IMark: 5561 return IRStmt_IMark(st->Ist.IMark.addr, 5562 st->Ist.IMark.len, 5563 st->Ist.IMark.delta); 5564 case Ist_NoOp: 5565 return IRStmt_NoOp(); 5566 case Ist_MBE: 5567 return IRStmt_MBE(st->Ist.MBE.event); 5568 case Ist_CAS: 5569 cas = st->Ist.CAS.details; 5570 cas2 = mkIRCAS( 5571 cas->oldHi, cas->oldLo, cas->end, 5572 atbSubst_Expr(env, cas->addr), 5573 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL, 5574 atbSubst_Expr(env, cas->expdLo), 5575 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL, 5576 atbSubst_Expr(env, cas->dataLo) 5577 ); 5578 return IRStmt_CAS(cas2); 5579 case Ist_LLSC: 5580 return IRStmt_LLSC( 5581 st->Ist.LLSC.end, 5582 st->Ist.LLSC.result, 5583 atbSubst_Expr(env, st->Ist.LLSC.addr), 5584 st->Ist.LLSC.storedata 5585 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL 5586 ); 5587 case Ist_Dirty: 5588 d = st->Ist.Dirty.details; 5589 d2 = emptyIRDirty(); 5590 *d2 = *d; 5591 if (d2->mFx != Ifx_None) 5592 d2->mAddr = atbSubst_Expr(env, d2->mAddr); 5593 d2->guard = atbSubst_Expr(env, d2->guard); 5594 for (i = 0; d2->args[i]; i++) { 5595 IRExpr* arg = d2->args[i]; 5596 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 5597 d2->args[i] = atbSubst_Expr(env, arg); 5598 } 5599 return IRStmt_Dirty(d2); 5600 default: 5601 vex_printf("\n"); ppIRStmt(st); vex_printf("\n"); 5602 vpanic("atbSubst_Stmt"); 5603 } 5604 } 5605 5606 inline 5607 static Bool dirty_helper_stores ( const IRDirty *d ) 5608 { 5609 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify; 5610 } 5611 5612 inline 5613 static Interval dirty_helper_puts ( 5614 const IRDirty *d, 5615 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5616 VexRegisterUpdates pxControl, 5617 /*OUT*/Bool *requiresPreciseMemExns 5618 ) 5619 { 5620 Int i; 5621 Interval interval; 5622 5623 /* Passing the guest state pointer opens the door to modifying the 5624 guest state under the covers. It's not allowed, but let's be 5625 extra conservative and assume the worst. */ 5626 for (i = 0; d->args[i]; i++) { 5627 if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) { 5628 *requiresPreciseMemExns = True; 5629 /* Assume all guest state is written. */ 5630 interval.present = True; 5631 interval.low = 0; 5632 interval.high = 0x7FFFFFFF; 5633 return interval; 5634 } 5635 } 5636 5637 /* Check the side effects on the guest state */ 5638 interval.present = False; 5639 interval.low = interval.high = -1; 5640 *requiresPreciseMemExns = False; 5641 5642 for (i = 0; i < d->nFxState; ++i) { 5643 if (d->fxState[i].fx != Ifx_Read) { 5644 Int offset = d->fxState[i].offset; 5645 Int size = d->fxState[i].size; 5646 Int nRepeats = d->fxState[i].nRepeats; 5647 Int repeatLen = d->fxState[i].repeatLen; 5648 5649 if (preciseMemExnsFn(offset, 5650 offset + nRepeats * repeatLen + size - 1, 5651 pxControl)) { 5652 *requiresPreciseMemExns = True; 5653 } 5654 update_interval(&interval, offset, 5655 offset + nRepeats * repeatLen + size - 1); 5656 } 5657 } 5658 5659 return interval; 5660 } 5661 5662 /* Return an interval if st modifies the guest state. Via 5663 requiresPreciseMemExns return whether or not that modification 5664 requires precise exceptions. */ 5665 static Interval stmt_modifies_guest_state ( 5666 IRSB *bb, const IRStmt *st, 5667 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5668 VexRegisterUpdates pxControl, 5669 /*OUT*/Bool *requiresPreciseMemExns 5670 ) 5671 { 5672 Interval interval; 5673 5674 switch (st->tag) { 5675 case Ist_Put: { 5676 Int offset = st->Ist.Put.offset; 5677 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data)); 5678 5679 *requiresPreciseMemExns 5680 = preciseMemExnsFn(offset, offset + size - 1, pxControl); 5681 interval.present = True; 5682 interval.low = offset; 5683 interval.high = offset + size - 1; 5684 return interval; 5685 } 5686 5687 case Ist_PutI: { 5688 IRRegArray *descr = st->Ist.PutI.details->descr; 5689 Int offset = descr->base; 5690 Int size = sizeofIRType(descr->elemTy); 5691 5692 /* We quietly assume here that all segments are contiguous and there 5693 are no holes. This is to avoid a loop. The assumption is conservative 5694 in the sense that we might report that precise memory exceptions are 5695 needed when in fact they are not. */ 5696 *requiresPreciseMemExns 5697 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1, 5698 pxControl); 5699 interval.present = True; 5700 interval.low = offset; 5701 interval.high = offset + descr->nElems * size - 1; 5702 return interval; 5703 } 5704 5705 case Ist_Dirty: 5706 return dirty_helper_puts(st->Ist.Dirty.details, 5707 preciseMemExnsFn, pxControl, 5708 requiresPreciseMemExns); 5709 5710 default: 5711 *requiresPreciseMemExns = False; 5712 interval.present = False; 5713 interval.low = -1; 5714 interval.high = -1; 5715 return interval; 5716 } 5717 } 5718 5719 /* notstatic */ Addr ado_treebuild_BB ( 5720 IRSB* bb, 5721 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 5722 VexRegisterUpdates pxControl 5723 ) 5724 { 5725 Int i, j, k, m; 5726 Bool stmtStores, invalidateMe; 5727 Interval putInterval; 5728 IRStmt* st; 5729 IRStmt* st2; 5730 ATmpInfo env[A_NENV]; 5731 5732 Bool max_ga_known = False; 5733 Addr max_ga = 0; 5734 5735 Int n_tmps = bb->tyenv->types_used; 5736 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort)); 5737 5738 /* Phase 1. Scan forwards in bb, counting use occurrences of each 5739 temp. Also count occurrences in the bb->next field. Take the 5740 opportunity to also find the maximum guest address in the block, 5741 since that will be needed later for deciding when we can safely 5742 elide event checks. */ 5743 5744 for (i = 0; i < n_tmps; i++) 5745 uses[i] = 0; 5746 5747 for (i = 0; i < bb->stmts_used; i++) { 5748 st = bb->stmts[i]; 5749 switch (st->tag) { 5750 case Ist_NoOp: 5751 continue; 5752 case Ist_IMark: { 5753 UInt len = st->Ist.IMark.len; 5754 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1; 5755 max_ga_known = True; 5756 if (mga > max_ga) 5757 max_ga = mga; 5758 break; 5759 } 5760 default: 5761 break; 5762 } 5763 aoccCount_Stmt( uses, st ); 5764 } 5765 aoccCount_Expr(uses, bb->next ); 5766 5767 # if 0 5768 for (i = 0; i < n_tmps; i++) { 5769 if (uses[i] == 0) 5770 continue; 5771 ppIRTemp( (IRTemp)i ); 5772 vex_printf(" used %d\n", (Int)uses[i] ); 5773 } 5774 # endif 5775 5776 /* Phase 2. Scan forwards in bb. For each statement in turn: 5777 5778 If the env is full, emit the end element. This guarantees 5779 there is at least one free slot in the following. 5780 5781 On seeing 't = E', occ(t)==1, 5782 let E'=env(E) 5783 delete this stmt 5784 add t -> E' to the front of the env 5785 Examine E' and set the hints for E' appropriately 5786 (doesLoad? doesGet?) 5787 5788 On seeing any other stmt, 5789 let stmt' = env(stmt) 5790 remove from env any 't=E' binds invalidated by stmt 5791 emit the invalidated stmts 5792 emit stmt' 5793 compact any holes in env 5794 by sliding entries towards the front 5795 5796 Finally, apply env to bb->next. 5797 */ 5798 5799 for (i = 0; i < A_NENV; i++) { 5800 env[i].bindee = NULL; 5801 env[i].binder = IRTemp_INVALID; 5802 } 5803 5804 /* The stmts in bb are being reordered, and we are guaranteed to 5805 end up with no more than the number we started with. Use i to 5806 be the cursor of the current stmt examined and j <= i to be that 5807 for the current stmt being written. 5808 */ 5809 j = 0; 5810 for (i = 0; i < bb->stmts_used; i++) { 5811 5812 st = bb->stmts[i]; 5813 if (st->tag == Ist_NoOp) 5814 continue; 5815 5816 /* Ensure there's at least one space in the env, by emitting 5817 the oldest binding if necessary. */ 5818 if (env[A_NENV-1].bindee != NULL) { 5819 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder, 5820 env[A_NENV-1].bindee ); 5821 j++; 5822 vassert(j <= i); 5823 env[A_NENV-1].bindee = NULL; 5824 } 5825 5826 /* Consider current stmt. */ 5827 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) { 5828 IRExpr *e, *e2; 5829 5830 /* optional extra: dump dead bindings as we find them. 5831 Removes the need for a prior dead-code removal pass. */ 5832 if (uses[st->Ist.WrTmp.tmp] == 0) { 5833 if (0) vex_printf("DEAD binding\n"); 5834 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5835 } 5836 vassert(uses[st->Ist.WrTmp.tmp] == 1); 5837 5838 /* ok, we have 't = E', occ(t)==1. Do the abovementioned 5839 actions. */ 5840 e = st->Ist.WrTmp.data; 5841 e2 = atbSubst_Expr(env, e); 5842 addToEnvFront(env, st->Ist.WrTmp.tmp, e2); 5843 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2); 5844 /* don't advance j, as we are deleting this stmt and instead 5845 holding it temporarily in the env. */ 5846 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */ 5847 } 5848 5849 /* we get here for any other kind of statement. */ 5850 /* 'use up' any bindings required by the current statement. */ 5851 st2 = atbSubst_Stmt(env, st); 5852 5853 /* Now, before this stmt, dump any bindings in env that it 5854 invalidates. These need to be dumped in the order in which 5855 they originally entered env -- that means from oldest to 5856 youngest. */ 5857 5858 /* putInterval/stmtStores characterise what the stmt under 5859 consideration does, or might do (sidely safe @ True). */ 5860 5861 Bool putRequiresPreciseMemExns; 5862 putInterval = stmt_modifies_guest_state( 5863 bb, st, preciseMemExnsFn, pxControl, 5864 &putRequiresPreciseMemExns 5865 ); 5866 5867 /* be True if this stmt writes memory or might do (==> we don't 5868 want to reorder other loads or stores relative to it). Also, 5869 both LL and SC fall under this classification, since we 5870 really ought to be conservative and not reorder any other 5871 memory transactions relative to them. */ 5872 stmtStores 5873 = toBool( st->tag == Ist_Store 5874 || (st->tag == Ist_Dirty 5875 && dirty_helper_stores(st->Ist.Dirty.details)) 5876 || st->tag == Ist_LLSC 5877 || st->tag == Ist_CAS ); 5878 5879 for (k = A_NENV-1; k >= 0; k--) { 5880 if (env[k].bindee == NULL) 5881 continue; 5882 /* Compare the actions of this stmt with the actions of 5883 binding 'k', to see if they invalidate the binding. */ 5884 invalidateMe 5885 = toBool( 5886 /* a store invalidates loaded data */ 5887 (env[k].doesLoad && stmtStores) 5888 /* a put invalidates get'd data, if they overlap */ 5889 || ((env[k].getInterval.present && putInterval.present) && 5890 intervals_overlap(env[k].getInterval, putInterval)) 5891 /* a put invalidates loaded data. That means, in essense, that 5892 a load expression cannot be substituted into a statement 5893 that follows the put. But there is nothing wrong doing so 5894 except when the put statement requries precise exceptions. 5895 Think of a load that is moved past a put where the put 5896 updates the IP in the guest state. If the load generates 5897 a segfault, the wrong address (line number) would be 5898 reported. */ 5899 || (env[k].doesLoad && putInterval.present && 5900 putRequiresPreciseMemExns) 5901 /* probably overly conservative: a memory bus event 5902 invalidates absolutely everything, so that all 5903 computation prior to it is forced to complete before 5904 proceeding with the event (fence,lock,unlock). */ 5905 || st->tag == Ist_MBE 5906 /* also be (probably overly) paranoid re AbiHints */ 5907 || st->tag == Ist_AbiHint 5908 ); 5909 if (invalidateMe) { 5910 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee ); 5911 j++; 5912 vassert(j <= i); 5913 env[k].bindee = NULL; 5914 } 5915 } 5916 5917 /* Slide in-use entries in env up to the front */ 5918 m = 0; 5919 for (k = 0; k < A_NENV; k++) { 5920 if (env[k].bindee != NULL) { 5921 env[m] = env[k]; 5922 m++; 5923 } 5924 } 5925 for (m = m; m < A_NENV; m++) { 5926 env[m].bindee = NULL; 5927 } 5928 5929 /* finally, emit the substituted statement */ 5930 bb->stmts[j] = st2; 5931 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */ 5932 j++; 5933 5934 vassert(j <= i+1); 5935 } /* for each stmt in the original bb ... */ 5936 5937 /* Finally ... substitute the ->next field as much as possible, and 5938 dump any left-over bindings. Hmm. Perhaps there should be no 5939 left over bindings? Or any left-over bindings are 5940 by definition dead? */ 5941 bb->next = atbSubst_Expr(env, bb->next); 5942 bb->stmts_used = j; 5943 5944 return max_ga_known ? max_ga : ~(Addr)0; 5945 } 5946 5947 5948 /*---------------------------------------------------------------*/ 5949 /*--- MSVC specific transformation hacks ---*/ 5950 /*---------------------------------------------------------------*/ 5951 5952 /* The purpose of all this is to find MSVC's idiom for non-constant 5953 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into 5954 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has 5955 generates a lot of false positives from the MSVC version because it 5956 doesn't understand that XORing an undefined bit with itself gives a 5957 defined result. 5958 5959 This isn't a problem for the simple case "x ^ x", because iropt 5960 folds it to a constant zero before Memcheck ever sees it. But in 5961 this case we have an intervening "& c" which defeats the simple 5962 case. So we have to carefully inspect all expressions rooted at an 5963 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the 5964 7 other variants resulting from swapping the order of arguments to 5965 the three binary operations. If we get a match, we then replace 5966 the tree with "(a & ~c) | (b & c)", and Memcheck is happy. 5967 5968 The key difficulty is to spot the two uses of "a". To normalise 5969 the IR to maximise the chances of success, we first do a CSE pass, 5970 with CSEing of loads enabled, since the two "a" expressions may be 5971 loads, which need to be commoned up. Then we do a constant folding 5972 pass, so as to remove any tmp-to-tmp assignment chains that would 5973 make matching the original expression more difficult. 5974 */ 5975 5976 5977 /* Helper function for debug printing */ 5978 __attribute__((unused)) 5979 static void print_flat_expr ( IRExpr** env, IRExpr* e ) 5980 { 5981 if (e == NULL) { 5982 vex_printf("?"); 5983 return; 5984 } 5985 switch (e->tag) { 5986 case Iex_Binop: { 5987 ppIROp(e->Iex.Binop.op); 5988 vex_printf("("); 5989 print_flat_expr(env, e->Iex.Binop.arg1); 5990 vex_printf(","); 5991 print_flat_expr(env, e->Iex.Binop.arg2); 5992 vex_printf(")"); 5993 break; 5994 } 5995 case Iex_Unop: { 5996 ppIROp(e->Iex.Unop.op); 5997 vex_printf("("); 5998 print_flat_expr(env, e->Iex.Unop.arg); 5999 vex_printf(")"); 6000 break; 6001 } 6002 case Iex_RdTmp: 6003 ppIRTemp(e->Iex.RdTmp.tmp); 6004 vex_printf("="); 6005 print_flat_expr(env, chase(env, e)); 6006 break; 6007 case Iex_Const: 6008 case Iex_CCall: 6009 case Iex_Load: 6010 case Iex_ITE: 6011 case Iex_Get: 6012 ppIRExpr(e); 6013 break; 6014 default: 6015 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n"); 6016 vassert(0); 6017 } 6018 } 6019 6020 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms) 6021 or any of the other 7 variants generated by switching the order 6022 of arguments to the outer ^, the inner ^ and the &. 6023 */ 6024 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb, 6025 /*OUT*/IRExpr** cc, 6026 IRExpr** env, IRExpr* e, 6027 IROp opAND, IROp opXOR) 6028 { 6029 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \ 6030 && (_e)->Iex.Binop.op == (_op)) 6031 # define ISATOM(_e) isIRAtom(_e) 6032 # define STEP(_e) chase1(env, (_e)) 6033 # define LL(_e) ((_e)->Iex.Binop.arg1) 6034 # define RR(_e) ((_e)->Iex.Binop.arg2) 6035 6036 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR; 6037 6038 /* This is common to all 8 cases */ 6039 if (!ISBIN(e, opXOR)) goto fail; 6040 6041 /* -----and------ */ 6042 /* --xor--- */ 6043 /* find variant 1: a1 ^ ((a2 ^ b) & c) */ 6044 /* find variant 2: a1 ^ ((b ^ a2) & c) */ 6045 a1 = and = xor = c = a2bL = a2bR = NULL; 6046 6047 a1 = LL(e); 6048 and = STEP(RR(e)); 6049 if (!ISBIN(and, opAND)) goto v34; 6050 xor = STEP(LL(and)); 6051 c = RR(and); 6052 if (!ISBIN(xor, opXOR)) goto v34; 6053 a2bL = LL(xor); 6054 a2bR = RR(xor); 6055 6056 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6057 *aa = a1; 6058 *bb = a2bR; 6059 *cc = c; 6060 return 1; 6061 } 6062 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6063 *aa = a1; 6064 *bb = a2bL; 6065 *cc = c; 6066 return 2; 6067 } 6068 6069 v34: 6070 /* -----and------ */ 6071 /* --xor--- */ 6072 /* find variant 3: ((a2 ^ b) & c) ^ a1 */ 6073 /* find variant 4: ((b ^ a2) & c) ^ a1 */ 6074 a1 = and = xor = c = a2bL = a2bR = NULL; 6075 6076 a1 = RR(e); 6077 and = STEP(LL(e)); 6078 if (!ISBIN(and, opAND)) goto v56; 6079 xor = STEP(LL(and)); 6080 c = RR(and); 6081 if (!ISBIN(xor, opXOR)) goto v56; 6082 a2bL = LL(xor); 6083 a2bR = RR(xor); 6084 6085 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6086 *aa = a1; 6087 *bb = a2bR; 6088 *cc = c; 6089 return 3; 6090 } 6091 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6092 *aa = a1; 6093 *bb = a2bL; 6094 *cc = c; 6095 return 4; 6096 } 6097 6098 v56: 6099 /* -----and------ */ 6100 /* --xor--- */ 6101 /* find variant 5: a1 ^ (c & (a2 ^ b)) */ 6102 /* find variant 6: a1 ^ (c & (b ^ a2)) */ 6103 a1 = and = xor = c = a2bL = a2bR = NULL; 6104 6105 a1 = LL(e); 6106 and = STEP(RR(e)); 6107 if (!ISBIN(and, opAND)) goto v78; 6108 xor = STEP(RR(and)); 6109 c = LL(and); 6110 if (!ISBIN(xor, opXOR)) goto v78; 6111 a2bL = LL(xor); 6112 a2bR = RR(xor); 6113 6114 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6115 *aa = a1; 6116 *bb = a2bR; 6117 *cc = c; 6118 vassert(5-5); // ATC 6119 return 5; 6120 } 6121 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6122 *aa = a1; 6123 *bb = a2bL; 6124 *cc = c; 6125 vassert(6-6); // ATC 6126 return 6; 6127 } 6128 6129 v78: 6130 /* -----and------ */ 6131 /* --xor--- */ 6132 /* find variant 7: (c & (a2 ^ b)) ^ a1 */ 6133 /* find variant 8: (c & (b ^ a2)) ^ a1 */ 6134 a1 = and = xor = c = a2bL = a2bR = NULL; 6135 6136 a1 = RR(e); 6137 and = STEP(LL(e)); 6138 if (!ISBIN(and, opAND)) goto fail; 6139 xor = STEP(RR(and)); 6140 c = LL(and); 6141 if (!ISBIN(xor, opXOR)) goto fail; 6142 a2bL = LL(xor); 6143 a2bR = RR(xor); 6144 6145 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { 6146 *aa = a1; 6147 *bb = a2bR; 6148 *cc = c; 6149 return 7; 6150 } 6151 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { 6152 *aa = a1; 6153 *bb = a2bL; 6154 *cc = c; 6155 return 8; 6156 } 6157 6158 fail: 6159 return 0; 6160 6161 # undef ISBIN 6162 # undef ISATOM 6163 # undef STEP 6164 # undef LL 6165 # undef RR 6166 } 6167 6168 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other 6169 variants thereof generated by switching arguments around), return 6170 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */ 6171 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e ) 6172 { 6173 if (e->tag != Iex_Binop) 6174 return NULL; 6175 6176 const HChar* tyNm = NULL; 6177 IROp opOR = Iop_INVALID; 6178 IROp opAND = Iop_INVALID; 6179 IROp opNOT = Iop_INVALID; 6180 IROp opXOR = Iop_INVALID; 6181 switch (e->Iex.Binop.op) { 6182 case Iop_Xor32: 6183 tyNm = "I32"; 6184 opOR = Iop_Or32; opAND = Iop_And32; 6185 opNOT = Iop_Not32; opXOR = Iop_Xor32; 6186 break; 6187 case Iop_Xor16: 6188 tyNm = "I16"; 6189 opOR = Iop_Or16; opAND = Iop_And16; 6190 opNOT = Iop_Not16; opXOR = Iop_Xor16; 6191 break; 6192 case Iop_Xor8: 6193 tyNm = "I8"; 6194 opOR = Iop_Or8; opAND = Iop_And8; 6195 opNOT = Iop_Not8; opXOR = Iop_Xor8; 6196 break; 6197 default: 6198 return NULL; 6199 } 6200 6201 IRExpr* aa = NULL; 6202 IRExpr* bb = NULL; 6203 IRExpr* cc = NULL; 6204 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR); 6205 if (variant > 0) { 6206 static UInt ctr = 0; 6207 if (0) 6208 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, " 6209 "type %s, variant %u\n", 6210 ++ctr, tyNm, variant); 6211 /* It's vitally important that the returned aa, bb and cc are 6212 atoms -- either constants or tmps. If it's anything else 6213 (eg, a GET) then incorporating them in a tree at this point 6214 in the SB may erroneously pull them forwards (eg of a PUT 6215 that originally was after the GET) and so transform the IR 6216 wrongly. spotBitfieldAssignment should guarantee only to 6217 give us atoms, but we check here anyway. */ 6218 vassert(aa && isIRAtom(aa)); 6219 vassert(bb && isIRAtom(bb)); 6220 vassert(cc && isIRAtom(cc)); 6221 return IRExpr_Binop( 6222 opOR, 6223 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)), 6224 IRExpr_Binop(opAND, bb, cc) 6225 ); 6226 } 6227 return NULL; 6228 } 6229 6230 6231 /* SB is modified in-place. Visit all the IRExprs and, for those 6232 which are allowed to be non-atomic, perform the XOR transform if 6233 possible. This makes |sb| be non-flat, but that's ok, the caller 6234 can re-flatten it. Returns True iff any changes were made. */ 6235 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb ) 6236 { 6237 Int i; 6238 Bool changed = False; 6239 6240 /* Make the tmp->expr environment, so we can use it for 6241 chasing expressions. */ 6242 Int n_tmps = sb->tyenv->types_used; 6243 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); 6244 for (i = 0; i < n_tmps; i++) 6245 env[i] = NULL; 6246 6247 for (i = 0; i < sb->stmts_used; i++) { 6248 IRStmt* st = sb->stmts[i]; 6249 if (st->tag != Ist_WrTmp) 6250 continue; 6251 IRTemp t = st->Ist.WrTmp.tmp; 6252 vassert(t >= 0 && t < n_tmps); 6253 env[t] = st->Ist.WrTmp.data; 6254 } 6255 6256 for (i = 0; i < sb->stmts_used; i++) { 6257 IRStmt* st = sb->stmts[i]; 6258 6259 switch (st->tag) { 6260 case Ist_AbiHint: 6261 vassert(isIRAtom(st->Ist.AbiHint.base)); 6262 vassert(isIRAtom(st->Ist.AbiHint.nia)); 6263 break; 6264 case Ist_Put: 6265 vassert(isIRAtom(st->Ist.Put.data)); 6266 break; 6267 case Ist_PutI: { 6268 IRPutI* puti = st->Ist.PutI.details; 6269 vassert(isIRAtom(puti->ix)); 6270 vassert(isIRAtom(puti->data)); 6271 break; 6272 } 6273 case Ist_WrTmp: { 6274 /* This is the one place where an expr (st->Ist.WrTmp.data) is 6275 allowed to be more than just a constant or a tmp. */ 6276 IRExpr* mb_new_data 6277 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data); 6278 if (mb_new_data) { 6279 //ppIRSB(sb); 6280 st->Ist.WrTmp.data = mb_new_data; 6281 //ppIRSB(sb); 6282 changed = True; 6283 } 6284 break; 6285 } 6286 case Ist_Store: 6287 vassert(isIRAtom(st->Ist.Store.addr)); 6288 vassert(isIRAtom(st->Ist.Store.data)); 6289 break; 6290 case Ist_StoreG: { 6291 IRStoreG* sg = st->Ist.StoreG.details; 6292 vassert(isIRAtom(sg->addr)); 6293 vassert(isIRAtom(sg->data)); 6294 vassert(isIRAtom(sg->guard)); 6295 break; 6296 } 6297 case Ist_LoadG: { 6298 IRLoadG* lg = st->Ist.LoadG.details; 6299 vassert(isIRAtom(lg->addr)); 6300 vassert(isIRAtom(lg->alt)); 6301 vassert(isIRAtom(lg->guard)); 6302 break; 6303 } 6304 case Ist_CAS: { 6305 IRCAS* cas = st->Ist.CAS.details; 6306 vassert(isIRAtom(cas->addr)); 6307 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 6308 vassert(isIRAtom(cas->expdLo)); 6309 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 6310 vassert(isIRAtom(cas->dataLo)); 6311 break; 6312 } 6313 case Ist_LLSC: 6314 vassert(isIRAtom(st->Ist.LLSC.addr)); 6315 if (st->Ist.LLSC.storedata) 6316 vassert(isIRAtom(st->Ist.LLSC.storedata)); 6317 break; 6318 case Ist_Dirty: { 6319 IRDirty* d = st->Ist.Dirty.details; 6320 if (d->mFx != Ifx_None) { 6321 vassert(isIRAtom(d->mAddr)); 6322 } 6323 vassert(isIRAtom(d->guard)); 6324 for (Int j = 0; d->args[j]; j++) { 6325 IRExpr* arg = d->args[j]; 6326 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { 6327 vassert(isIRAtom(arg)); 6328 } 6329 } 6330 break; 6331 } 6332 case Ist_IMark: 6333 case Ist_NoOp: 6334 case Ist_MBE: 6335 break; 6336 case Ist_Exit: 6337 vassert(isIRAtom(st->Ist.Exit.guard)); 6338 break; 6339 default: 6340 vex_printf("\n"); ppIRStmt(st); 6341 vpanic("do_XOR_TRANSFORMS_IRSB"); 6342 } 6343 } 6344 6345 vassert(isIRAtom(sb->next)); 6346 return changed; 6347 } 6348 6349 6350 static IRSB* do_MSVC_HACKS ( IRSB* sb ) 6351 { 6352 // Normalise as much as we can. This is the one-and-only place 6353 // where we call do_cse_BB with allowLoadsToBeCSEd set to True. 6354 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ ); 6355 if (any_cse_changes) { 6356 // CSEing might have created dead code. Remove it. 6357 sb = cprop_BB ( sb ); 6358 do_deadcode_BB(sb); 6359 } 6360 6361 // Visit all atoms, do the transformation proper. bb is modified 6362 // in-place. 6363 Bool changed = do_XOR_TRANSFORM_IRSB(sb); 6364 6365 if (changed) { 6366 // The transformation generates non-flat expressions, so we now 6367 // need to re-flatten the block. 6368 sb = flatten_BB(sb); 6369 } 6370 6371 return sb; 6372 } 6373 6374 6375 /*---------------------------------------------------------------*/ 6376 /*--- iropt main ---*/ 6377 /*---------------------------------------------------------------*/ 6378 6379 static Bool iropt_verbose = False; /* True; */ 6380 6381 6382 /* Do a simple cleanup pass on bb. This is: redundant Get removal, 6383 redundant Put removal, constant propagation, dead code removal, 6384 clean helper specialisation, and dead code removal (again). 6385 */ 6386 6387 6388 static 6389 IRSB* cheap_transformations ( 6390 IRSB* bb, 6391 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 6392 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 6393 VexRegisterUpdates pxControl 6394 ) 6395 { 6396 redundant_get_removal_BB ( bb ); 6397 if (iropt_verbose) { 6398 vex_printf("\n========= REDUNDANT GET\n\n" ); 6399 ppIRSB(bb); 6400 } 6401 6402 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6403 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl ); 6404 } 6405 if (iropt_verbose) { 6406 vex_printf("\n========= REDUNDANT PUT\n\n" ); 6407 ppIRSB(bb); 6408 } 6409 6410 bb = cprop_BB ( bb ); 6411 if (iropt_verbose) { 6412 vex_printf("\n========= CPROPD\n\n" ); 6413 ppIRSB(bb); 6414 } 6415 6416 do_deadcode_BB ( bb ); 6417 if (iropt_verbose) { 6418 vex_printf("\n========= DEAD\n\n" ); 6419 ppIRSB(bb); 6420 } 6421 6422 bb = spec_helpers_BB ( bb, specHelper ); 6423 do_deadcode_BB ( bb ); 6424 if (iropt_verbose) { 6425 vex_printf("\n========= SPECd \n\n" ); 6426 ppIRSB(bb); 6427 } 6428 6429 return bb; 6430 } 6431 6432 6433 /* Do some more expensive transformations on bb, which are aimed at 6434 optimising as much as possible in the presence of GetI and PutI. */ 6435 6436 static 6437 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl ) 6438 { 6439 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6440 collapse_AddSub_chains_BB( bb ); 6441 do_redundant_GetI_elimination( bb ); 6442 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6443 do_redundant_PutI_elimination( bb, pxControl ); 6444 } 6445 do_deadcode_BB( bb ); 6446 return bb; 6447 } 6448 6449 6450 /* Scan a flattened BB to look for signs that more expensive 6451 optimisations might be useful: 6452 - find out if there are any GetIs and PutIs 6453 - find out if there are any floating or vector-typed temporaries 6454 */ 6455 6456 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, 6457 /*OUT*/Bool* hasVorFtemps, 6458 IRSB* bb ) 6459 { 6460 Int i, j; 6461 IRStmt* st; 6462 IRDirty* d; 6463 IRCAS* cas; 6464 6465 *hasGetIorPutI = False; 6466 *hasVorFtemps = False; 6467 6468 for (i = 0; i < bb->stmts_used; i++) { 6469 st = bb->stmts[i]; 6470 switch (st->tag) { 6471 case Ist_AbiHint: 6472 vassert(isIRAtom(st->Ist.AbiHint.base)); 6473 vassert(isIRAtom(st->Ist.AbiHint.nia)); 6474 break; 6475 case Ist_PutI: 6476 *hasGetIorPutI = True; 6477 break; 6478 case Ist_WrTmp: 6479 if (st->Ist.WrTmp.data->tag == Iex_GetI) 6480 *hasGetIorPutI = True; 6481 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) { 6482 case Ity_I1: case Ity_I8: case Ity_I16: 6483 case Ity_I32: case Ity_I64: case Ity_I128: 6484 break; 6485 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128: 6486 case Ity_V128: case Ity_V256: 6487 *hasVorFtemps = True; 6488 break; 6489 case Ity_D32: case Ity_D64: case Ity_D128: 6490 *hasVorFtemps = True; 6491 break; 6492 default: 6493 goto bad; 6494 } 6495 break; 6496 case Ist_Put: 6497 vassert(isIRAtom(st->Ist.Put.data)); 6498 break; 6499 case Ist_Store: 6500 vassert(isIRAtom(st->Ist.Store.addr)); 6501 vassert(isIRAtom(st->Ist.Store.data)); 6502 break; 6503 case Ist_StoreG: { 6504 IRStoreG* sg = st->Ist.StoreG.details; 6505 vassert(isIRAtom(sg->addr)); 6506 vassert(isIRAtom(sg->data)); 6507 vassert(isIRAtom(sg->guard)); 6508 break; 6509 } 6510 case Ist_LoadG: { 6511 IRLoadG* lg = st->Ist.LoadG.details; 6512 vassert(isIRAtom(lg->addr)); 6513 vassert(isIRAtom(lg->alt)); 6514 vassert(isIRAtom(lg->guard)); 6515 break; 6516 } 6517 case Ist_CAS: 6518 cas = st->Ist.CAS.details; 6519 vassert(isIRAtom(cas->addr)); 6520 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); 6521 vassert(isIRAtom(cas->expdLo)); 6522 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); 6523 vassert(isIRAtom(cas->dataLo)); 6524 break; 6525 case Ist_LLSC: 6526 vassert(isIRAtom(st->Ist.LLSC.addr)); 6527 if (st->Ist.LLSC.storedata) 6528 vassert(isIRAtom(st->Ist.LLSC.storedata)); 6529 break; 6530 case Ist_Dirty: 6531 d = st->Ist.Dirty.details; 6532 vassert(isIRAtom(d->guard)); 6533 for (j = 0; d->args[j]; j++) { 6534 IRExpr* arg = d->args[j]; 6535 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) 6536 vassert(isIRAtom(arg)); 6537 } 6538 if (d->mFx != Ifx_None) 6539 vassert(isIRAtom(d->mAddr)); 6540 break; 6541 case Ist_NoOp: 6542 case Ist_IMark: 6543 case Ist_MBE: 6544 break; 6545 case Ist_Exit: 6546 vassert(isIRAtom(st->Ist.Exit.guard)); 6547 break; 6548 default: 6549 bad: 6550 ppIRStmt(st); 6551 vpanic("considerExpensives"); 6552 } 6553 } 6554 } 6555 6556 6557 /* ---------------- The main iropt entry point. ---------------- */ 6558 6559 /* exported from this file */ 6560 /* Rules of the game: 6561 6562 - IRExpr/IRStmt trees should be treated as immutable, as they 6563 may get shared. So never change a field of such a tree node; 6564 instead construct and return a new one if needed. 6565 */ 6566 6567 6568 IRSB* do_iropt_BB( 6569 IRSB* bb0, 6570 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), 6571 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), 6572 VexRegisterUpdates pxControl, 6573 Addr guest_addr, 6574 VexArch guest_arch 6575 ) 6576 { 6577 static Int n_total = 0; 6578 static Int n_expensive = 0; 6579 6580 Bool hasGetIorPutI, hasVorFtemps; 6581 IRSB *bb, *bb2; 6582 6583 n_total++; 6584 6585 /* First flatten the block out, since all other 6586 phases assume flat code. */ 6587 6588 bb = flatten_BB ( bb0 ); 6589 6590 if (iropt_verbose) { 6591 vex_printf("\n========= FLAT\n\n" ); 6592 ppIRSB(bb); 6593 } 6594 6595 /* If at level 0, stop now. */ 6596 if (vex_control.iropt_level <= 0) return bb; 6597 6598 /* Now do a preliminary cleanup pass, and figure out if we also 6599 need to do 'expensive' optimisations. Expensive optimisations 6600 are deemed necessary if the block contains any GetIs or PutIs. 6601 If needed, do expensive transformations and then another cheap 6602 cleanup pass. */ 6603 6604 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl ); 6605 6606 if (guest_arch == VexArchARM) { 6607 /* Translating Thumb2 code produces a lot of chaff. We have to 6608 work extra hard to get rid of it. */ 6609 bb = cprop_BB(bb); 6610 bb = spec_helpers_BB ( bb, specHelper ); 6611 if (pxControl < VexRegUpdAllregsAtEachInsn) { 6612 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl ); 6613 } 6614 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6615 do_deadcode_BB( bb ); 6616 } 6617 6618 if (vex_control.iropt_level > 1) { 6619 6620 /* Peer at what we have, to decide how much more effort to throw 6621 at it. */ 6622 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb ); 6623 6624 if (hasVorFtemps && !hasGetIorPutI) { 6625 /* If any evidence of FP or Vector activity, CSE, as that 6626 tends to mop up all manner of lardy code to do with 6627 rounding modes. Don't bother if hasGetIorPutI since that 6628 case leads into the expensive transformations, which do 6629 CSE anyway. */ 6630 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6631 do_deadcode_BB( bb ); 6632 } 6633 6634 if (hasGetIorPutI) { 6635 Bool cses; 6636 n_expensive++; 6637 if (DEBUG_IROPT) 6638 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive); 6639 bb = expensive_transformations( bb, pxControl ); 6640 bb = cheap_transformations( bb, specHelper, 6641 preciseMemExnsFn, pxControl ); 6642 /* Potentially common up GetIs */ 6643 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6644 if (cses) 6645 bb = cheap_transformations( bb, specHelper, 6646 preciseMemExnsFn, pxControl ); 6647 } 6648 6649 /////////////////////////////////////////////////////////// 6650 // BEGIN MSVC optimised code transformation hacks 6651 if (0) 6652 bb = do_MSVC_HACKS(bb); 6653 // END MSVC optimised code transformation hacks 6654 /////////////////////////////////////////////////////////// 6655 6656 /* Now have a go at unrolling simple (single-BB) loops. If 6657 successful, clean up the results as much as possible. */ 6658 6659 bb2 = maybe_loop_unroll_BB( bb, guest_addr ); 6660 if (bb2) { 6661 bb = cheap_transformations( bb2, specHelper, 6662 preciseMemExnsFn, pxControl ); 6663 if (hasGetIorPutI) { 6664 bb = expensive_transformations( bb, pxControl ); 6665 bb = cheap_transformations( bb, specHelper, 6666 preciseMemExnsFn, pxControl ); 6667 } else { 6668 /* at least do CSE and dead code removal */ 6669 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ ); 6670 do_deadcode_BB( bb ); 6671 } 6672 if (0) vex_printf("vex iropt: unrolled a loop\n"); 6673 } 6674 6675 } 6676 6677 return bb; 6678 } 6679 6680 6681 /*---------------------------------------------------------------*/ 6682 /*--- end ir_opt.c ---*/ 6683 /*---------------------------------------------------------------*/ 6684