1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk (at) ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "cpu.h" 33 #include "exec/exec-all.h" 34 #include "tcg-op.h" 35 36 #define CASE_OP_32_64(x) \ 37 glue(glue(case INDEX_op_, x), _i32): \ 38 glue(glue(case INDEX_op_, x), _i64) 39 40 typedef enum { 41 TCG_TEMP_UNDEF = 0, 42 TCG_TEMP_CONST, 43 TCG_TEMP_COPY, 44 } tcg_temp_state; 45 46 struct tcg_temp_info { 47 tcg_temp_state state; 48 uint16_t prev_copy; 49 uint16_t next_copy; 50 tcg_target_ulong val; 51 tcg_target_ulong mask; 52 }; 53 54 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 55 56 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 57 the copy flag from the left temp. */ 58 static void reset_temp(TCGArg temp) 59 { 60 if (temps[temp].state == TCG_TEMP_COPY) { 61 if (temps[temp].prev_copy == temps[temp].next_copy) { 62 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 63 } else { 64 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 65 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 66 } 67 } 68 temps[temp].state = TCG_TEMP_UNDEF; 69 temps[temp].mask = -1; 70 } 71 72 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 73 static void reset_all_temps(int nb_temps) 74 { 75 int i; 76 for (i = 0; i < nb_temps; i++) { 77 temps[i].state = TCG_TEMP_UNDEF; 78 temps[i].mask = -1; 79 } 80 } 81 82 static int op_bits(TCGOpcode op) 83 { 84 const TCGOpDef *def = &tcg_op_defs[op]; 85 return def->flags & TCG_OPF_64BIT ? 64 : 32; 86 } 87 88 static TCGOpcode op_to_movi(TCGOpcode op) 89 { 90 switch (op_bits(op)) { 91 case 32: 92 return INDEX_op_movi_i32; 93 case 64: 94 return INDEX_op_movi_i64; 95 default: 96 fprintf(stderr, "op_to_movi: unexpected return value of " 97 "function op_bits.\n"); 98 tcg_abort(); 99 } 100 } 101 102 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 103 { 104 TCGArg i; 105 106 /* If this is already a global, we can't do better. */ 107 if (temp < s->nb_globals) { 108 return temp; 109 } 110 111 /* Search for a global first. */ 112 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 113 if (i < s->nb_globals) { 114 return i; 115 } 116 } 117 118 /* If it is a temp, search for a temp local. */ 119 if (!s->temps[temp].temp_local) { 120 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 121 if (s->temps[i].temp_local) { 122 return i; 123 } 124 } 125 } 126 127 /* Failure to find a better representation, return the same temp. */ 128 return temp; 129 } 130 131 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 132 { 133 TCGArg i; 134 135 if (arg1 == arg2) { 136 return true; 137 } 138 139 if (temps[arg1].state != TCG_TEMP_COPY 140 || temps[arg2].state != TCG_TEMP_COPY) { 141 return false; 142 } 143 144 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 145 if (i == arg2) { 146 return true; 147 } 148 } 149 150 return false; 151 } 152 153 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, 154 TCGArg dst, TCGArg src) 155 { 156 reset_temp(dst); 157 temps[dst].mask = temps[src].mask; 158 assert(temps[src].state != TCG_TEMP_CONST); 159 160 if (s->temps[src].type == s->temps[dst].type) { 161 if (temps[src].state != TCG_TEMP_COPY) { 162 temps[src].state = TCG_TEMP_COPY; 163 temps[src].next_copy = src; 164 temps[src].prev_copy = src; 165 } 166 temps[dst].state = TCG_TEMP_COPY; 167 temps[dst].next_copy = temps[src].next_copy; 168 temps[dst].prev_copy = src; 169 temps[temps[dst].next_copy].prev_copy = dst; 170 temps[src].next_copy = dst; 171 } 172 173 gen_args[0] = dst; 174 gen_args[1] = src; 175 } 176 177 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val) 178 { 179 reset_temp(dst); 180 temps[dst].state = TCG_TEMP_CONST; 181 temps[dst].val = val; 182 temps[dst].mask = val; 183 gen_args[0] = dst; 184 gen_args[1] = val; 185 } 186 187 static TCGOpcode op_to_mov(TCGOpcode op) 188 { 189 switch (op_bits(op)) { 190 case 32: 191 return INDEX_op_mov_i32; 192 case 64: 193 return INDEX_op_mov_i64; 194 default: 195 fprintf(stderr, "op_to_mov: unexpected return value of " 196 "function op_bits.\n"); 197 tcg_abort(); 198 } 199 } 200 201 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 202 { 203 uint64_t l64, h64; 204 205 switch (op) { 206 CASE_OP_32_64(add): 207 return x + y; 208 209 CASE_OP_32_64(sub): 210 return x - y; 211 212 CASE_OP_32_64(mul): 213 return x * y; 214 215 CASE_OP_32_64(and): 216 return x & y; 217 218 CASE_OP_32_64(or): 219 return x | y; 220 221 CASE_OP_32_64(xor): 222 return x ^ y; 223 224 case INDEX_op_shl_i32: 225 return (uint32_t)x << (uint32_t)y; 226 227 case INDEX_op_shl_i64: 228 return (uint64_t)x << (uint64_t)y; 229 230 case INDEX_op_shr_i32: 231 return (uint32_t)x >> (uint32_t)y; 232 233 case INDEX_op_shr_i64: 234 return (uint64_t)x >> (uint64_t)y; 235 236 case INDEX_op_sar_i32: 237 return (int32_t)x >> (int32_t)y; 238 239 case INDEX_op_sar_i64: 240 return (int64_t)x >> (int64_t)y; 241 242 case INDEX_op_rotr_i32: 243 return ror32(x, y); 244 245 case INDEX_op_rotr_i64: 246 return ror64(x, y); 247 248 case INDEX_op_rotl_i32: 249 return rol32(x, y); 250 251 case INDEX_op_rotl_i64: 252 return rol64(x, y); 253 254 CASE_OP_32_64(not): 255 return ~x; 256 257 CASE_OP_32_64(neg): 258 return -x; 259 260 CASE_OP_32_64(andc): 261 return x & ~y; 262 263 CASE_OP_32_64(orc): 264 return x | ~y; 265 266 CASE_OP_32_64(eqv): 267 return ~(x ^ y); 268 269 CASE_OP_32_64(nand): 270 return ~(x & y); 271 272 CASE_OP_32_64(nor): 273 return ~(x | y); 274 275 CASE_OP_32_64(ext8s): 276 return (int8_t)x; 277 278 CASE_OP_32_64(ext16s): 279 return (int16_t)x; 280 281 CASE_OP_32_64(ext8u): 282 return (uint8_t)x; 283 284 CASE_OP_32_64(ext16u): 285 return (uint16_t)x; 286 287 case INDEX_op_ext32s_i64: 288 return (int32_t)x; 289 290 case INDEX_op_ext32u_i64: 291 return (uint32_t)x; 292 293 case INDEX_op_muluh_i32: 294 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 295 case INDEX_op_mulsh_i32: 296 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 297 298 case INDEX_op_muluh_i64: 299 mulu64(&l64, &h64, x, y); 300 return h64; 301 case INDEX_op_mulsh_i64: 302 muls64(&l64, &h64, x, y); 303 return h64; 304 305 case INDEX_op_div_i32: 306 /* Avoid crashing on divide by zero, otherwise undefined. */ 307 return (int32_t)x / ((int32_t)y ? : 1); 308 case INDEX_op_divu_i32: 309 return (uint32_t)x / ((uint32_t)y ? : 1); 310 case INDEX_op_div_i64: 311 return (int64_t)x / ((int64_t)y ? : 1); 312 case INDEX_op_divu_i64: 313 return (uint64_t)x / ((uint64_t)y ? : 1); 314 315 case INDEX_op_rem_i32: 316 return (int32_t)x % ((int32_t)y ? : 1); 317 case INDEX_op_remu_i32: 318 return (uint32_t)x % ((uint32_t)y ? : 1); 319 case INDEX_op_rem_i64: 320 return (int64_t)x % ((int64_t)y ? : 1); 321 case INDEX_op_remu_i64: 322 return (uint64_t)x % ((uint64_t)y ? : 1); 323 324 default: 325 fprintf(stderr, 326 "Unrecognized operation %d in do_constant_folding.\n", op); 327 tcg_abort(); 328 } 329 } 330 331 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 332 { 333 TCGArg res = do_constant_folding_2(op, x, y); 334 if (op_bits(op) == 32) { 335 res &= 0xffffffff; 336 } 337 return res; 338 } 339 340 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 341 { 342 switch (c) { 343 case TCG_COND_EQ: 344 return x == y; 345 case TCG_COND_NE: 346 return x != y; 347 case TCG_COND_LT: 348 return (int32_t)x < (int32_t)y; 349 case TCG_COND_GE: 350 return (int32_t)x >= (int32_t)y; 351 case TCG_COND_LE: 352 return (int32_t)x <= (int32_t)y; 353 case TCG_COND_GT: 354 return (int32_t)x > (int32_t)y; 355 case TCG_COND_LTU: 356 return x < y; 357 case TCG_COND_GEU: 358 return x >= y; 359 case TCG_COND_LEU: 360 return x <= y; 361 case TCG_COND_GTU: 362 return x > y; 363 default: 364 tcg_abort(); 365 } 366 } 367 368 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 369 { 370 switch (c) { 371 case TCG_COND_EQ: 372 return x == y; 373 case TCG_COND_NE: 374 return x != y; 375 case TCG_COND_LT: 376 return (int64_t)x < (int64_t)y; 377 case TCG_COND_GE: 378 return (int64_t)x >= (int64_t)y; 379 case TCG_COND_LE: 380 return (int64_t)x <= (int64_t)y; 381 case TCG_COND_GT: 382 return (int64_t)x > (int64_t)y; 383 case TCG_COND_LTU: 384 return x < y; 385 case TCG_COND_GEU: 386 return x >= y; 387 case TCG_COND_LEU: 388 return x <= y; 389 case TCG_COND_GTU: 390 return x > y; 391 default: 392 tcg_abort(); 393 } 394 } 395 396 static bool do_constant_folding_cond_eq(TCGCond c) 397 { 398 switch (c) { 399 case TCG_COND_GT: 400 case TCG_COND_LTU: 401 case TCG_COND_LT: 402 case TCG_COND_GTU: 403 case TCG_COND_NE: 404 return 0; 405 case TCG_COND_GE: 406 case TCG_COND_GEU: 407 case TCG_COND_LE: 408 case TCG_COND_LEU: 409 case TCG_COND_EQ: 410 return 1; 411 default: 412 tcg_abort(); 413 } 414 } 415 416 /* Return 2 if the condition can't be simplified, and the result 417 of the condition (0 or 1) if it can */ 418 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 419 TCGArg y, TCGCond c) 420 { 421 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 422 switch (op_bits(op)) { 423 case 32: 424 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 425 case 64: 426 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 427 default: 428 tcg_abort(); 429 } 430 } else if (temps_are_copies(x, y)) { 431 return do_constant_folding_cond_eq(c); 432 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 433 switch (c) { 434 case TCG_COND_LTU: 435 return 0; 436 case TCG_COND_GEU: 437 return 1; 438 default: 439 return 2; 440 } 441 } else { 442 return 2; 443 } 444 } 445 446 /* Return 2 if the condition can't be simplified, and the result 447 of the condition (0 or 1) if it can */ 448 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 449 { 450 TCGArg al = p1[0], ah = p1[1]; 451 TCGArg bl = p2[0], bh = p2[1]; 452 453 if (temps[bl].state == TCG_TEMP_CONST 454 && temps[bh].state == TCG_TEMP_CONST) { 455 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 456 457 if (temps[al].state == TCG_TEMP_CONST 458 && temps[ah].state == TCG_TEMP_CONST) { 459 uint64_t a; 460 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 461 return do_constant_folding_cond_64(a, b, c); 462 } 463 if (b == 0) { 464 switch (c) { 465 case TCG_COND_LTU: 466 return 0; 467 case TCG_COND_GEU: 468 return 1; 469 default: 470 break; 471 } 472 } 473 } 474 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 475 return do_constant_folding_cond_eq(c); 476 } 477 return 2; 478 } 479 480 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 481 { 482 TCGArg a1 = *p1, a2 = *p2; 483 int sum = 0; 484 sum += temps[a1].state == TCG_TEMP_CONST; 485 sum -= temps[a2].state == TCG_TEMP_CONST; 486 487 /* Prefer the constant in second argument, and then the form 488 op a, a, b, which is better handled on non-RISC hosts. */ 489 if (sum > 0 || (sum == 0 && dest == a2)) { 490 *p1 = a2; 491 *p2 = a1; 492 return true; 493 } 494 return false; 495 } 496 497 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 498 { 499 int sum = 0; 500 sum += temps[p1[0]].state == TCG_TEMP_CONST; 501 sum += temps[p1[1]].state == TCG_TEMP_CONST; 502 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 503 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 504 if (sum > 0) { 505 TCGArg t; 506 t = p1[0], p1[0] = p2[0], p2[0] = t; 507 t = p1[1], p1[1] = p2[1], p2[1] = t; 508 return true; 509 } 510 return false; 511 } 512 513 /* Propagate constants and copies, fold constant expressions. */ 514 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, 515 TCGArg *args, TCGOpDef *tcg_op_defs) 516 { 517 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; 518 tcg_target_ulong mask, affected; 519 TCGOpcode op; 520 const TCGOpDef *def; 521 TCGArg *gen_args; 522 TCGArg tmp; 523 524 /* Array VALS has an element for each temp. 525 If this temp holds a constant then its value is kept in VALS' element. 526 If this temp is a copy of other ones then the other copies are 527 available through the doubly linked circular list. */ 528 529 nb_temps = s->nb_temps; 530 nb_globals = s->nb_globals; 531 reset_all_temps(nb_temps); 532 533 nb_ops = tcg_opc_ptr - s->gen_opc_buf; 534 gen_args = args; 535 for (op_index = 0; op_index < nb_ops; op_index++) { 536 op = s->gen_opc_buf[op_index]; 537 def = &tcg_op_defs[op]; 538 /* Do copy propagation */ 539 if (op == INDEX_op_call) { 540 int nb_oargs = args[0] >> 16; 541 int nb_iargs = args[0] & 0xffff; 542 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) { 543 if (temps[args[i]].state == TCG_TEMP_COPY) { 544 args[i] = find_better_copy(s, args[i]); 545 } 546 } 547 } else { 548 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { 549 if (temps[args[i]].state == TCG_TEMP_COPY) { 550 args[i] = find_better_copy(s, args[i]); 551 } 552 } 553 } 554 555 /* For commutative operations make constant second argument */ 556 switch (op) { 557 CASE_OP_32_64(add): 558 CASE_OP_32_64(mul): 559 CASE_OP_32_64(and): 560 CASE_OP_32_64(or): 561 CASE_OP_32_64(xor): 562 CASE_OP_32_64(eqv): 563 CASE_OP_32_64(nand): 564 CASE_OP_32_64(nor): 565 CASE_OP_32_64(muluh): 566 CASE_OP_32_64(mulsh): 567 swap_commutative(args[0], &args[1], &args[2]); 568 break; 569 CASE_OP_32_64(brcond): 570 if (swap_commutative(-1, &args[0], &args[1])) { 571 args[2] = tcg_swap_cond(args[2]); 572 } 573 break; 574 CASE_OP_32_64(setcond): 575 if (swap_commutative(args[0], &args[1], &args[2])) { 576 args[3] = tcg_swap_cond(args[3]); 577 } 578 break; 579 CASE_OP_32_64(movcond): 580 if (swap_commutative(-1, &args[1], &args[2])) { 581 args[5] = tcg_swap_cond(args[5]); 582 } 583 /* For movcond, we canonicalize the "false" input reg to match 584 the destination reg so that the tcg backend can implement 585 a "move if true" operation. */ 586 if (swap_commutative(args[0], &args[4], &args[3])) { 587 args[5] = tcg_invert_cond(args[5]); 588 } 589 break; 590 CASE_OP_32_64(add2): 591 swap_commutative(args[0], &args[2], &args[4]); 592 swap_commutative(args[1], &args[3], &args[5]); 593 break; 594 CASE_OP_32_64(mulu2): 595 CASE_OP_32_64(muls2): 596 swap_commutative(args[0], &args[2], &args[3]); 597 break; 598 case INDEX_op_brcond2_i32: 599 if (swap_commutative2(&args[0], &args[2])) { 600 args[4] = tcg_swap_cond(args[4]); 601 } 602 break; 603 case INDEX_op_setcond2_i32: 604 if (swap_commutative2(&args[1], &args[3])) { 605 args[5] = tcg_swap_cond(args[5]); 606 } 607 break; 608 default: 609 break; 610 } 611 612 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 613 and "sub r, 0, a => neg r, a" case. */ 614 switch (op) { 615 CASE_OP_32_64(shl): 616 CASE_OP_32_64(shr): 617 CASE_OP_32_64(sar): 618 CASE_OP_32_64(rotl): 619 CASE_OP_32_64(rotr): 620 if (temps[args[1]].state == TCG_TEMP_CONST 621 && temps[args[1]].val == 0) { 622 s->gen_opc_buf[op_index] = op_to_movi(op); 623 tcg_opt_gen_movi(gen_args, args[0], 0); 624 args += 3; 625 gen_args += 2; 626 continue; 627 } 628 break; 629 CASE_OP_32_64(sub): 630 { 631 TCGOpcode neg_op; 632 bool have_neg; 633 634 if (temps[args[2]].state == TCG_TEMP_CONST) { 635 /* Proceed with possible constant folding. */ 636 break; 637 } 638 if (op == INDEX_op_sub_i32) { 639 neg_op = INDEX_op_neg_i32; 640 have_neg = TCG_TARGET_HAS_neg_i32; 641 } else { 642 neg_op = INDEX_op_neg_i64; 643 have_neg = TCG_TARGET_HAS_neg_i64; 644 } 645 if (!have_neg) { 646 break; 647 } 648 if (temps[args[1]].state == TCG_TEMP_CONST 649 && temps[args[1]].val == 0) { 650 s->gen_opc_buf[op_index] = neg_op; 651 reset_temp(args[0]); 652 gen_args[0] = args[0]; 653 gen_args[1] = args[2]; 654 args += 3; 655 gen_args += 2; 656 continue; 657 } 658 } 659 break; 660 default: 661 break; 662 } 663 664 /* Simplify expression for "op r, a, 0 => mov r, a" cases */ 665 switch (op) { 666 CASE_OP_32_64(add): 667 CASE_OP_32_64(sub): 668 CASE_OP_32_64(shl): 669 CASE_OP_32_64(shr): 670 CASE_OP_32_64(sar): 671 CASE_OP_32_64(rotl): 672 CASE_OP_32_64(rotr): 673 CASE_OP_32_64(or): 674 CASE_OP_32_64(xor): 675 if (temps[args[1]].state == TCG_TEMP_CONST) { 676 /* Proceed with possible constant folding. */ 677 break; 678 } 679 if (temps[args[2]].state == TCG_TEMP_CONST 680 && temps[args[2]].val == 0) { 681 if (temps_are_copies(args[0], args[1])) { 682 s->gen_opc_buf[op_index] = INDEX_op_nop; 683 } else { 684 s->gen_opc_buf[op_index] = op_to_mov(op); 685 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 686 gen_args += 2; 687 } 688 args += 3; 689 continue; 690 } 691 break; 692 default: 693 break; 694 } 695 696 /* Simplify using known-zero bits */ 697 mask = -1; 698 affected = -1; 699 switch (op) { 700 CASE_OP_32_64(ext8s): 701 if ((temps[args[1]].mask & 0x80) != 0) { 702 break; 703 } 704 CASE_OP_32_64(ext8u): 705 mask = 0xff; 706 goto and_const; 707 CASE_OP_32_64(ext16s): 708 if ((temps[args[1]].mask & 0x8000) != 0) { 709 break; 710 } 711 CASE_OP_32_64(ext16u): 712 mask = 0xffff; 713 goto and_const; 714 case INDEX_op_ext32s_i64: 715 if ((temps[args[1]].mask & 0x80000000) != 0) { 716 break; 717 } 718 case INDEX_op_ext32u_i64: 719 mask = 0xffffffffU; 720 goto and_const; 721 722 CASE_OP_32_64(and): 723 mask = temps[args[2]].mask; 724 if (temps[args[2]].state == TCG_TEMP_CONST) { 725 and_const: 726 affected = temps[args[1]].mask & ~mask; 727 } 728 mask = temps[args[1]].mask & mask; 729 break; 730 731 CASE_OP_32_64(sar): 732 if (temps[args[2]].state == TCG_TEMP_CONST) { 733 mask = ((tcg_target_long)temps[args[1]].mask 734 >> temps[args[2]].val); 735 } 736 break; 737 738 CASE_OP_32_64(shr): 739 if (temps[args[2]].state == TCG_TEMP_CONST) { 740 mask = temps[args[1]].mask >> temps[args[2]].val; 741 } 742 break; 743 744 CASE_OP_32_64(shl): 745 if (temps[args[2]].state == TCG_TEMP_CONST) { 746 mask = temps[args[1]].mask << temps[args[2]].val; 747 } 748 break; 749 750 CASE_OP_32_64(neg): 751 /* Set to 1 all bits to the left of the rightmost. */ 752 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 753 break; 754 755 CASE_OP_32_64(deposit): 756 tmp = ((1ull << args[4]) - 1); 757 mask = ((temps[args[1]].mask & ~(tmp << args[3])) 758 | ((temps[args[2]].mask & tmp) << args[3])); 759 break; 760 761 CASE_OP_32_64(or): 762 CASE_OP_32_64(xor): 763 mask = temps[args[1]].mask | temps[args[2]].mask; 764 break; 765 766 CASE_OP_32_64(setcond): 767 mask = 1; 768 break; 769 770 CASE_OP_32_64(movcond): 771 mask = temps[args[3]].mask | temps[args[4]].mask; 772 break; 773 774 default: 775 break; 776 } 777 778 if (mask == 0) { 779 assert(def->nb_oargs == 1); 780 s->gen_opc_buf[op_index] = op_to_movi(op); 781 tcg_opt_gen_movi(gen_args, args[0], 0); 782 args += def->nb_oargs + def->nb_iargs + def->nb_cargs; 783 gen_args += 2; 784 continue; 785 } 786 if (affected == 0) { 787 assert(def->nb_oargs == 1); 788 if (temps_are_copies(args[0], args[1])) { 789 s->gen_opc_buf[op_index] = INDEX_op_nop; 790 } else if (temps[args[1]].state != TCG_TEMP_CONST) { 791 s->gen_opc_buf[op_index] = op_to_mov(op); 792 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 793 gen_args += 2; 794 } else { 795 s->gen_opc_buf[op_index] = op_to_movi(op); 796 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val); 797 gen_args += 2; 798 } 799 args += def->nb_iargs + 1; 800 continue; 801 } 802 803 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 804 switch (op) { 805 CASE_OP_32_64(and): 806 CASE_OP_32_64(mul): 807 CASE_OP_32_64(muluh): 808 CASE_OP_32_64(mulsh): 809 if ((temps[args[2]].state == TCG_TEMP_CONST 810 && temps[args[2]].val == 0)) { 811 s->gen_opc_buf[op_index] = op_to_movi(op); 812 tcg_opt_gen_movi(gen_args, args[0], 0); 813 args += 3; 814 gen_args += 2; 815 continue; 816 } 817 break; 818 default: 819 break; 820 } 821 822 /* Simplify expression for "op r, a, a => mov r, a" cases */ 823 switch (op) { 824 CASE_OP_32_64(or): 825 CASE_OP_32_64(and): 826 if (temps_are_copies(args[1], args[2])) { 827 if (temps_are_copies(args[0], args[1])) { 828 s->gen_opc_buf[op_index] = INDEX_op_nop; 829 } else { 830 s->gen_opc_buf[op_index] = op_to_mov(op); 831 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 832 gen_args += 2; 833 } 834 args += 3; 835 continue; 836 } 837 break; 838 default: 839 break; 840 } 841 842 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 843 switch (op) { 844 CASE_OP_32_64(sub): 845 CASE_OP_32_64(xor): 846 if (temps_are_copies(args[1], args[2])) { 847 s->gen_opc_buf[op_index] = op_to_movi(op); 848 tcg_opt_gen_movi(gen_args, args[0], 0); 849 gen_args += 2; 850 args += 3; 851 continue; 852 } 853 break; 854 default: 855 break; 856 } 857 858 /* Propagate constants through copy operations and do constant 859 folding. Constants will be substituted to arguments by register 860 allocator where needed and possible. Also detect copies. */ 861 switch (op) { 862 CASE_OP_32_64(mov): 863 if (temps_are_copies(args[0], args[1])) { 864 args += 2; 865 s->gen_opc_buf[op_index] = INDEX_op_nop; 866 break; 867 } 868 if (temps[args[1]].state != TCG_TEMP_CONST) { 869 tcg_opt_gen_mov(s, gen_args, args[0], args[1]); 870 gen_args += 2; 871 args += 2; 872 break; 873 } 874 /* Source argument is constant. Rewrite the operation and 875 let movi case handle it. */ 876 op = op_to_movi(op); 877 s->gen_opc_buf[op_index] = op; 878 args[1] = temps[args[1]].val; 879 /* fallthrough */ 880 CASE_OP_32_64(movi): 881 tcg_opt_gen_movi(gen_args, args[0], args[1]); 882 gen_args += 2; 883 args += 2; 884 break; 885 886 CASE_OP_32_64(not): 887 CASE_OP_32_64(neg): 888 CASE_OP_32_64(ext8s): 889 CASE_OP_32_64(ext8u): 890 CASE_OP_32_64(ext16s): 891 CASE_OP_32_64(ext16u): 892 case INDEX_op_ext32s_i64: 893 case INDEX_op_ext32u_i64: 894 if (temps[args[1]].state == TCG_TEMP_CONST) { 895 s->gen_opc_buf[op_index] = op_to_movi(op); 896 tmp = do_constant_folding(op, temps[args[1]].val, 0); 897 tcg_opt_gen_movi(gen_args, args[0], tmp); 898 gen_args += 2; 899 args += 2; 900 break; 901 } 902 goto do_default; 903 904 CASE_OP_32_64(add): 905 CASE_OP_32_64(sub): 906 CASE_OP_32_64(mul): 907 CASE_OP_32_64(or): 908 CASE_OP_32_64(and): 909 CASE_OP_32_64(xor): 910 CASE_OP_32_64(shl): 911 CASE_OP_32_64(shr): 912 CASE_OP_32_64(sar): 913 CASE_OP_32_64(rotl): 914 CASE_OP_32_64(rotr): 915 CASE_OP_32_64(andc): 916 CASE_OP_32_64(orc): 917 CASE_OP_32_64(eqv): 918 CASE_OP_32_64(nand): 919 CASE_OP_32_64(nor): 920 CASE_OP_32_64(muluh): 921 CASE_OP_32_64(mulsh): 922 CASE_OP_32_64(div): 923 CASE_OP_32_64(divu): 924 CASE_OP_32_64(rem): 925 CASE_OP_32_64(remu): 926 if (temps[args[1]].state == TCG_TEMP_CONST 927 && temps[args[2]].state == TCG_TEMP_CONST) { 928 s->gen_opc_buf[op_index] = op_to_movi(op); 929 tmp = do_constant_folding(op, temps[args[1]].val, 930 temps[args[2]].val); 931 tcg_opt_gen_movi(gen_args, args[0], tmp); 932 gen_args += 2; 933 args += 3; 934 break; 935 } 936 goto do_default; 937 938 CASE_OP_32_64(deposit): 939 if (temps[args[1]].state == TCG_TEMP_CONST 940 && temps[args[2]].state == TCG_TEMP_CONST) { 941 s->gen_opc_buf[op_index] = op_to_movi(op); 942 tmp = ((1ull << args[4]) - 1); 943 tmp = (temps[args[1]].val & ~(tmp << args[3])) 944 | ((temps[args[2]].val & tmp) << args[3]); 945 tcg_opt_gen_movi(gen_args, args[0], tmp); 946 gen_args += 2; 947 args += 5; 948 break; 949 } 950 goto do_default; 951 952 CASE_OP_32_64(setcond): 953 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); 954 if (tmp != 2) { 955 s->gen_opc_buf[op_index] = op_to_movi(op); 956 tcg_opt_gen_movi(gen_args, args[0], tmp); 957 gen_args += 2; 958 args += 4; 959 break; 960 } 961 goto do_default; 962 963 CASE_OP_32_64(brcond): 964 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); 965 if (tmp != 2) { 966 if (tmp) { 967 reset_all_temps(nb_temps); 968 s->gen_opc_buf[op_index] = INDEX_op_br; 969 gen_args[0] = args[3]; 970 gen_args += 1; 971 } else { 972 s->gen_opc_buf[op_index] = INDEX_op_nop; 973 } 974 args += 4; 975 break; 976 } 977 goto do_default; 978 979 CASE_OP_32_64(movcond): 980 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); 981 if (tmp != 2) { 982 if (temps_are_copies(args[0], args[4-tmp])) { 983 s->gen_opc_buf[op_index] = INDEX_op_nop; 984 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { 985 s->gen_opc_buf[op_index] = op_to_movi(op); 986 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val); 987 gen_args += 2; 988 } else { 989 s->gen_opc_buf[op_index] = op_to_mov(op); 990 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]); 991 gen_args += 2; 992 } 993 args += 6; 994 break; 995 } 996 goto do_default; 997 998 case INDEX_op_add2_i32: 999 case INDEX_op_sub2_i32: 1000 if (temps[args[2]].state == TCG_TEMP_CONST 1001 && temps[args[3]].state == TCG_TEMP_CONST 1002 && temps[args[4]].state == TCG_TEMP_CONST 1003 && temps[args[5]].state == TCG_TEMP_CONST) { 1004 uint32_t al = temps[args[2]].val; 1005 uint32_t ah = temps[args[3]].val; 1006 uint32_t bl = temps[args[4]].val; 1007 uint32_t bh = temps[args[5]].val; 1008 uint64_t a = ((uint64_t)ah << 32) | al; 1009 uint64_t b = ((uint64_t)bh << 32) | bl; 1010 TCGArg rl, rh; 1011 1012 if (op == INDEX_op_add2_i32) { 1013 a += b; 1014 } else { 1015 a -= b; 1016 } 1017 1018 /* We emit the extra nop when we emit the add2/sub2. */ 1019 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1020 1021 rl = args[0]; 1022 rh = args[1]; 1023 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1024 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1025 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a); 1026 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32)); 1027 gen_args += 4; 1028 args += 6; 1029 break; 1030 } 1031 goto do_default; 1032 1033 case INDEX_op_mulu2_i32: 1034 if (temps[args[2]].state == TCG_TEMP_CONST 1035 && temps[args[3]].state == TCG_TEMP_CONST) { 1036 uint32_t a = temps[args[2]].val; 1037 uint32_t b = temps[args[3]].val; 1038 uint64_t r = (uint64_t)a * b; 1039 TCGArg rl, rh; 1040 1041 /* We emit the extra nop when we emit the mulu2. */ 1042 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); 1043 1044 rl = args[0]; 1045 rh = args[1]; 1046 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1047 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32; 1048 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r); 1049 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32)); 1050 gen_args += 4; 1051 args += 4; 1052 break; 1053 } 1054 goto do_default; 1055 1056 case INDEX_op_brcond2_i32: 1057 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1058 if (tmp != 2) { 1059 if (tmp) { 1060 reset_all_temps(nb_temps); 1061 s->gen_opc_buf[op_index] = INDEX_op_br; 1062 gen_args[0] = args[5]; 1063 gen_args += 1; 1064 } else { 1065 s->gen_opc_buf[op_index] = INDEX_op_nop; 1066 } 1067 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1068 && temps[args[2]].state == TCG_TEMP_CONST 1069 && temps[args[3]].state == TCG_TEMP_CONST 1070 && temps[args[2]].val == 0 1071 && temps[args[3]].val == 0) { 1072 /* Simplify LT/GE comparisons vs zero to a single compare 1073 vs the high word of the input. */ 1074 reset_all_temps(nb_temps); 1075 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; 1076 gen_args[0] = args[1]; 1077 gen_args[1] = args[3]; 1078 gen_args[2] = args[4]; 1079 gen_args[3] = args[5]; 1080 gen_args += 4; 1081 } else { 1082 goto do_default; 1083 } 1084 args += 6; 1085 break; 1086 1087 case INDEX_op_setcond2_i32: 1088 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1089 if (tmp != 2) { 1090 s->gen_opc_buf[op_index] = INDEX_op_movi_i32; 1091 tcg_opt_gen_movi(gen_args, args[0], tmp); 1092 gen_args += 2; 1093 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1094 && temps[args[3]].state == TCG_TEMP_CONST 1095 && temps[args[4]].state == TCG_TEMP_CONST 1096 && temps[args[3]].val == 0 1097 && temps[args[4]].val == 0) { 1098 /* Simplify LT/GE comparisons vs zero to a single compare 1099 vs the high word of the input. */ 1100 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; 1101 reset_temp(args[0]); 1102 gen_args[0] = args[0]; 1103 gen_args[1] = args[2]; 1104 gen_args[2] = args[4]; 1105 gen_args[3] = args[5]; 1106 gen_args += 4; 1107 } else { 1108 goto do_default; 1109 } 1110 args += 6; 1111 break; 1112 1113 case INDEX_op_call: 1114 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); 1115 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | 1116 TCG_CALL_NO_WRITE_GLOBALS))) { 1117 for (i = 0; i < nb_globals; i++) { 1118 reset_temp(i); 1119 } 1120 } 1121 for (i = 0; i < (args[0] >> 16); i++) { 1122 reset_temp(args[i + 1]); 1123 } 1124 i = nb_call_args + 3; 1125 while (i) { 1126 *gen_args = *args; 1127 args++; 1128 gen_args++; 1129 i--; 1130 } 1131 break; 1132 1133 default: 1134 do_default: 1135 /* Default case: we know nothing about operation (or were unable 1136 to compute the operation result) so no propagation is done. 1137 We trash everything if the operation is the end of a basic 1138 block, otherwise we only trash the output args. "mask" is 1139 the non-zero bits mask for the first output arg. */ 1140 if (def->flags & TCG_OPF_BB_END) { 1141 reset_all_temps(nb_temps); 1142 } else { 1143 for (i = 0; i < def->nb_oargs; i++) { 1144 reset_temp(args[i]); 1145 } 1146 } 1147 for (i = 0; i < def->nb_args; i++) { 1148 gen_args[i] = args[i]; 1149 } 1150 args += def->nb_args; 1151 gen_args += def->nb_args; 1152 break; 1153 } 1154 } 1155 1156 return gen_args; 1157 } 1158 1159 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, 1160 TCGArg *args, TCGOpDef *tcg_op_defs) 1161 { 1162 TCGArg *res; 1163 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); 1164 return res; 1165 } 1166