Home | History | Annotate | Download | only in tcg
      1 /*
      2  * Optimizations for Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2010 Samsung Electronics.
      5  * Contributed by Kirill Batuzov <batuzovk (at) ispras.ru>
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "config.h"
     27 
     28 #include <stdlib.h>
     29 #include <stdio.h>
     30 
     31 #include "qemu-common.h"
     32 #include "cpu.h"
     33 #include "exec/exec-all.h"
     34 #include "tcg-op.h"
     35 
     36 #define CASE_OP_32_64(x)                        \
     37         glue(glue(case INDEX_op_, x), _i32):    \
     38         glue(glue(case INDEX_op_, x), _i64)
     39 
     40 typedef enum {
     41     TCG_TEMP_UNDEF = 0,
     42     TCG_TEMP_CONST,
     43     TCG_TEMP_COPY,
     44 } tcg_temp_state;
     45 
     46 struct tcg_temp_info {
     47     tcg_temp_state state;
     48     uint16_t prev_copy;
     49     uint16_t next_copy;
     50     tcg_target_ulong val;
     51     tcg_target_ulong mask;
     52 };
     53 
     54 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
     55 
     56 /* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
     57    the copy flag from the left temp.  */
     58 static void reset_temp(TCGArg temp)
     59 {
     60     if (temps[temp].state == TCG_TEMP_COPY) {
     61         if (temps[temp].prev_copy == temps[temp].next_copy) {
     62             temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
     63         } else {
     64             temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
     65             temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
     66         }
     67     }
     68     temps[temp].state = TCG_TEMP_UNDEF;
     69     temps[temp].mask = -1;
     70 }
     71 
     72 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
     73 static void reset_all_temps(int nb_temps)
     74 {
     75     int i;
     76     for (i = 0; i < nb_temps; i++) {
     77         temps[i].state = TCG_TEMP_UNDEF;
     78         temps[i].mask = -1;
     79     }
     80 }
     81 
     82 static int op_bits(TCGOpcode op)
     83 {
     84     const TCGOpDef *def = &tcg_op_defs[op];
     85     return def->flags & TCG_OPF_64BIT ? 64 : 32;
     86 }
     87 
     88 static TCGOpcode op_to_movi(TCGOpcode op)
     89 {
     90     switch (op_bits(op)) {
     91     case 32:
     92         return INDEX_op_movi_i32;
     93     case 64:
     94         return INDEX_op_movi_i64;
     95     default:
     96         fprintf(stderr, "op_to_movi: unexpected return value of "
     97                 "function op_bits.\n");
     98         tcg_abort();
     99     }
    100 }
    101 
    102 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
    103 {
    104     TCGArg i;
    105 
    106     /* If this is already a global, we can't do better. */
    107     if (temp < s->nb_globals) {
    108         return temp;
    109     }
    110 
    111     /* Search for a global first. */
    112     for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
    113         if (i < s->nb_globals) {
    114             return i;
    115         }
    116     }
    117 
    118     /* If it is a temp, search for a temp local. */
    119     if (!s->temps[temp].temp_local) {
    120         for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
    121             if (s->temps[i].temp_local) {
    122                 return i;
    123             }
    124         }
    125     }
    126 
    127     /* Failure to find a better representation, return the same temp. */
    128     return temp;
    129 }
    130 
    131 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
    132 {
    133     TCGArg i;
    134 
    135     if (arg1 == arg2) {
    136         return true;
    137     }
    138 
    139     if (temps[arg1].state != TCG_TEMP_COPY
    140         || temps[arg2].state != TCG_TEMP_COPY) {
    141         return false;
    142     }
    143 
    144     for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
    145         if (i == arg2) {
    146             return true;
    147         }
    148     }
    149 
    150     return false;
    151 }
    152 
    153 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
    154                             TCGArg dst, TCGArg src)
    155 {
    156     reset_temp(dst);
    157     temps[dst].mask = temps[src].mask;
    158     assert(temps[src].state != TCG_TEMP_CONST);
    159 
    160     if (s->temps[src].type == s->temps[dst].type) {
    161         if (temps[src].state != TCG_TEMP_COPY) {
    162             temps[src].state = TCG_TEMP_COPY;
    163             temps[src].next_copy = src;
    164             temps[src].prev_copy = src;
    165         }
    166         temps[dst].state = TCG_TEMP_COPY;
    167         temps[dst].next_copy = temps[src].next_copy;
    168         temps[dst].prev_copy = src;
    169         temps[temps[dst].next_copy].prev_copy = dst;
    170         temps[src].next_copy = dst;
    171     }
    172 
    173     gen_args[0] = dst;
    174     gen_args[1] = src;
    175 }
    176 
    177 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
    178 {
    179     reset_temp(dst);
    180     temps[dst].state = TCG_TEMP_CONST;
    181     temps[dst].val = val;
    182     temps[dst].mask = val;
    183     gen_args[0] = dst;
    184     gen_args[1] = val;
    185 }
    186 
    187 static TCGOpcode op_to_mov(TCGOpcode op)
    188 {
    189     switch (op_bits(op)) {
    190     case 32:
    191         return INDEX_op_mov_i32;
    192     case 64:
    193         return INDEX_op_mov_i64;
    194     default:
    195         fprintf(stderr, "op_to_mov: unexpected return value of "
    196                 "function op_bits.\n");
    197         tcg_abort();
    198     }
    199 }
    200 
    201 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
    202 {
    203     uint64_t l64, h64;
    204 
    205     switch (op) {
    206     CASE_OP_32_64(add):
    207         return x + y;
    208 
    209     CASE_OP_32_64(sub):
    210         return x - y;
    211 
    212     CASE_OP_32_64(mul):
    213         return x * y;
    214 
    215     CASE_OP_32_64(and):
    216         return x & y;
    217 
    218     CASE_OP_32_64(or):
    219         return x | y;
    220 
    221     CASE_OP_32_64(xor):
    222         return x ^ y;
    223 
    224     case INDEX_op_shl_i32:
    225         return (uint32_t)x << (uint32_t)y;
    226 
    227     case INDEX_op_shl_i64:
    228         return (uint64_t)x << (uint64_t)y;
    229 
    230     case INDEX_op_shr_i32:
    231         return (uint32_t)x >> (uint32_t)y;
    232 
    233     case INDEX_op_shr_i64:
    234         return (uint64_t)x >> (uint64_t)y;
    235 
    236     case INDEX_op_sar_i32:
    237         return (int32_t)x >> (int32_t)y;
    238 
    239     case INDEX_op_sar_i64:
    240         return (int64_t)x >> (int64_t)y;
    241 
    242     case INDEX_op_rotr_i32:
    243         return ror32(x, y);
    244 
    245     case INDEX_op_rotr_i64:
    246         return ror64(x, y);
    247 
    248     case INDEX_op_rotl_i32:
    249         return rol32(x, y);
    250 
    251     case INDEX_op_rotl_i64:
    252         return rol64(x, y);
    253 
    254     CASE_OP_32_64(not):
    255         return ~x;
    256 
    257     CASE_OP_32_64(neg):
    258         return -x;
    259 
    260     CASE_OP_32_64(andc):
    261         return x & ~y;
    262 
    263     CASE_OP_32_64(orc):
    264         return x | ~y;
    265 
    266     CASE_OP_32_64(eqv):
    267         return ~(x ^ y);
    268 
    269     CASE_OP_32_64(nand):
    270         return ~(x & y);
    271 
    272     CASE_OP_32_64(nor):
    273         return ~(x | y);
    274 
    275     CASE_OP_32_64(ext8s):
    276         return (int8_t)x;
    277 
    278     CASE_OP_32_64(ext16s):
    279         return (int16_t)x;
    280 
    281     CASE_OP_32_64(ext8u):
    282         return (uint8_t)x;
    283 
    284     CASE_OP_32_64(ext16u):
    285         return (uint16_t)x;
    286 
    287     case INDEX_op_ext32s_i64:
    288         return (int32_t)x;
    289 
    290     case INDEX_op_ext32u_i64:
    291         return (uint32_t)x;
    292 
    293     case INDEX_op_muluh_i32:
    294         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
    295     case INDEX_op_mulsh_i32:
    296         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
    297 
    298     case INDEX_op_muluh_i64:
    299         mulu64(&l64, &h64, x, y);
    300         return h64;
    301     case INDEX_op_mulsh_i64:
    302         muls64(&l64, &h64, x, y);
    303         return h64;
    304 
    305     case INDEX_op_div_i32:
    306         /* Avoid crashing on divide by zero, otherwise undefined.  */
    307         return (int32_t)x / ((int32_t)y ? : 1);
    308     case INDEX_op_divu_i32:
    309         return (uint32_t)x / ((uint32_t)y ? : 1);
    310     case INDEX_op_div_i64:
    311         return (int64_t)x / ((int64_t)y ? : 1);
    312     case INDEX_op_divu_i64:
    313         return (uint64_t)x / ((uint64_t)y ? : 1);
    314 
    315     case INDEX_op_rem_i32:
    316         return (int32_t)x % ((int32_t)y ? : 1);
    317     case INDEX_op_remu_i32:
    318         return (uint32_t)x % ((uint32_t)y ? : 1);
    319     case INDEX_op_rem_i64:
    320         return (int64_t)x % ((int64_t)y ? : 1);
    321     case INDEX_op_remu_i64:
    322         return (uint64_t)x % ((uint64_t)y ? : 1);
    323 
    324     default:
    325         fprintf(stderr,
    326                 "Unrecognized operation %d in do_constant_folding.\n", op);
    327         tcg_abort();
    328     }
    329 }
    330 
    331 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
    332 {
    333     TCGArg res = do_constant_folding_2(op, x, y);
    334     if (op_bits(op) == 32) {
    335         res &= 0xffffffff;
    336     }
    337     return res;
    338 }
    339 
    340 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
    341 {
    342     switch (c) {
    343     case TCG_COND_EQ:
    344         return x == y;
    345     case TCG_COND_NE:
    346         return x != y;
    347     case TCG_COND_LT:
    348         return (int32_t)x < (int32_t)y;
    349     case TCG_COND_GE:
    350         return (int32_t)x >= (int32_t)y;
    351     case TCG_COND_LE:
    352         return (int32_t)x <= (int32_t)y;
    353     case TCG_COND_GT:
    354         return (int32_t)x > (int32_t)y;
    355     case TCG_COND_LTU:
    356         return x < y;
    357     case TCG_COND_GEU:
    358         return x >= y;
    359     case TCG_COND_LEU:
    360         return x <= y;
    361     case TCG_COND_GTU:
    362         return x > y;
    363     default:
    364         tcg_abort();
    365     }
    366 }
    367 
    368 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
    369 {
    370     switch (c) {
    371     case TCG_COND_EQ:
    372         return x == y;
    373     case TCG_COND_NE:
    374         return x != y;
    375     case TCG_COND_LT:
    376         return (int64_t)x < (int64_t)y;
    377     case TCG_COND_GE:
    378         return (int64_t)x >= (int64_t)y;
    379     case TCG_COND_LE:
    380         return (int64_t)x <= (int64_t)y;
    381     case TCG_COND_GT:
    382         return (int64_t)x > (int64_t)y;
    383     case TCG_COND_LTU:
    384         return x < y;
    385     case TCG_COND_GEU:
    386         return x >= y;
    387     case TCG_COND_LEU:
    388         return x <= y;
    389     case TCG_COND_GTU:
    390         return x > y;
    391     default:
    392         tcg_abort();
    393     }
    394 }
    395 
    396 static bool do_constant_folding_cond_eq(TCGCond c)
    397 {
    398     switch (c) {
    399     case TCG_COND_GT:
    400     case TCG_COND_LTU:
    401     case TCG_COND_LT:
    402     case TCG_COND_GTU:
    403     case TCG_COND_NE:
    404         return 0;
    405     case TCG_COND_GE:
    406     case TCG_COND_GEU:
    407     case TCG_COND_LE:
    408     case TCG_COND_LEU:
    409     case TCG_COND_EQ:
    410         return 1;
    411     default:
    412         tcg_abort();
    413     }
    414 }
    415 
    416 /* Return 2 if the condition can't be simplified, and the result
    417    of the condition (0 or 1) if it can */
    418 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
    419                                        TCGArg y, TCGCond c)
    420 {
    421     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
    422         switch (op_bits(op)) {
    423         case 32:
    424             return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
    425         case 64:
    426             return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
    427         default:
    428             tcg_abort();
    429         }
    430     } else if (temps_are_copies(x, y)) {
    431         return do_constant_folding_cond_eq(c);
    432     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
    433         switch (c) {
    434         case TCG_COND_LTU:
    435             return 0;
    436         case TCG_COND_GEU:
    437             return 1;
    438         default:
    439             return 2;
    440         }
    441     } else {
    442         return 2;
    443     }
    444 }
    445 
    446 /* Return 2 if the condition can't be simplified, and the result
    447    of the condition (0 or 1) if it can */
    448 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
    449 {
    450     TCGArg al = p1[0], ah = p1[1];
    451     TCGArg bl = p2[0], bh = p2[1];
    452 
    453     if (temps[bl].state == TCG_TEMP_CONST
    454         && temps[bh].state == TCG_TEMP_CONST) {
    455         uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
    456 
    457         if (temps[al].state == TCG_TEMP_CONST
    458             && temps[ah].state == TCG_TEMP_CONST) {
    459             uint64_t a;
    460             a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
    461             return do_constant_folding_cond_64(a, b, c);
    462         }
    463         if (b == 0) {
    464             switch (c) {
    465             case TCG_COND_LTU:
    466                 return 0;
    467             case TCG_COND_GEU:
    468                 return 1;
    469             default:
    470                 break;
    471             }
    472         }
    473     }
    474     if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
    475         return do_constant_folding_cond_eq(c);
    476     }
    477     return 2;
    478 }
    479 
    480 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
    481 {
    482     TCGArg a1 = *p1, a2 = *p2;
    483     int sum = 0;
    484     sum += temps[a1].state == TCG_TEMP_CONST;
    485     sum -= temps[a2].state == TCG_TEMP_CONST;
    486 
    487     /* Prefer the constant in second argument, and then the form
    488        op a, a, b, which is better handled on non-RISC hosts. */
    489     if (sum > 0 || (sum == 0 && dest == a2)) {
    490         *p1 = a2;
    491         *p2 = a1;
    492         return true;
    493     }
    494     return false;
    495 }
    496 
    497 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
    498 {
    499     int sum = 0;
    500     sum += temps[p1[0]].state == TCG_TEMP_CONST;
    501     sum += temps[p1[1]].state == TCG_TEMP_CONST;
    502     sum -= temps[p2[0]].state == TCG_TEMP_CONST;
    503     sum -= temps[p2[1]].state == TCG_TEMP_CONST;
    504     if (sum > 0) {
    505         TCGArg t;
    506         t = p1[0], p1[0] = p2[0], p2[0] = t;
    507         t = p1[1], p1[1] = p2[1], p2[1] = t;
    508         return true;
    509     }
    510     return false;
    511 }
    512 
    513 /* Propagate constants and copies, fold constant expressions. */
    514 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
    515                                     TCGArg *args, TCGOpDef *tcg_op_defs)
    516 {
    517     int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
    518     tcg_target_ulong mask, affected;
    519     TCGOpcode op;
    520     const TCGOpDef *def;
    521     TCGArg *gen_args;
    522     TCGArg tmp;
    523 
    524     /* Array VALS has an element for each temp.
    525        If this temp holds a constant then its value is kept in VALS' element.
    526        If this temp is a copy of other ones then the other copies are
    527        available through the doubly linked circular list. */
    528 
    529     nb_temps = s->nb_temps;
    530     nb_globals = s->nb_globals;
    531     reset_all_temps(nb_temps);
    532 
    533     nb_ops = tcg_opc_ptr - s->gen_opc_buf;
    534     gen_args = args;
    535     for (op_index = 0; op_index < nb_ops; op_index++) {
    536         op = s->gen_opc_buf[op_index];
    537         def = &tcg_op_defs[op];
    538         /* Do copy propagation */
    539         if (op == INDEX_op_call) {
    540             int nb_oargs = args[0] >> 16;
    541             int nb_iargs = args[0] & 0xffff;
    542             for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
    543                 if (temps[args[i]].state == TCG_TEMP_COPY) {
    544                     args[i] = find_better_copy(s, args[i]);
    545                 }
    546             }
    547         } else {
    548             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
    549                 if (temps[args[i]].state == TCG_TEMP_COPY) {
    550                     args[i] = find_better_copy(s, args[i]);
    551                 }
    552             }
    553         }
    554 
    555         /* For commutative operations make constant second argument */
    556         switch (op) {
    557         CASE_OP_32_64(add):
    558         CASE_OP_32_64(mul):
    559         CASE_OP_32_64(and):
    560         CASE_OP_32_64(or):
    561         CASE_OP_32_64(xor):
    562         CASE_OP_32_64(eqv):
    563         CASE_OP_32_64(nand):
    564         CASE_OP_32_64(nor):
    565         CASE_OP_32_64(muluh):
    566         CASE_OP_32_64(mulsh):
    567             swap_commutative(args[0], &args[1], &args[2]);
    568             break;
    569         CASE_OP_32_64(brcond):
    570             if (swap_commutative(-1, &args[0], &args[1])) {
    571                 args[2] = tcg_swap_cond(args[2]);
    572             }
    573             break;
    574         CASE_OP_32_64(setcond):
    575             if (swap_commutative(args[0], &args[1], &args[2])) {
    576                 args[3] = tcg_swap_cond(args[3]);
    577             }
    578             break;
    579         CASE_OP_32_64(movcond):
    580             if (swap_commutative(-1, &args[1], &args[2])) {
    581                 args[5] = tcg_swap_cond(args[5]);
    582             }
    583             /* For movcond, we canonicalize the "false" input reg to match
    584                the destination reg so that the tcg backend can implement
    585                a "move if true" operation.  */
    586             if (swap_commutative(args[0], &args[4], &args[3])) {
    587                 args[5] = tcg_invert_cond(args[5]);
    588             }
    589             break;
    590         CASE_OP_32_64(add2):
    591             swap_commutative(args[0], &args[2], &args[4]);
    592             swap_commutative(args[1], &args[3], &args[5]);
    593             break;
    594         CASE_OP_32_64(mulu2):
    595         CASE_OP_32_64(muls2):
    596             swap_commutative(args[0], &args[2], &args[3]);
    597             break;
    598         case INDEX_op_brcond2_i32:
    599             if (swap_commutative2(&args[0], &args[2])) {
    600                 args[4] = tcg_swap_cond(args[4]);
    601             }
    602             break;
    603         case INDEX_op_setcond2_i32:
    604             if (swap_commutative2(&args[1], &args[3])) {
    605                 args[5] = tcg_swap_cond(args[5]);
    606             }
    607             break;
    608         default:
    609             break;
    610         }
    611 
    612         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
    613            and "sub r, 0, a => neg r, a" case.  */
    614         switch (op) {
    615         CASE_OP_32_64(shl):
    616         CASE_OP_32_64(shr):
    617         CASE_OP_32_64(sar):
    618         CASE_OP_32_64(rotl):
    619         CASE_OP_32_64(rotr):
    620             if (temps[args[1]].state == TCG_TEMP_CONST
    621                 && temps[args[1]].val == 0) {
    622                 s->gen_opc_buf[op_index] = op_to_movi(op);
    623                 tcg_opt_gen_movi(gen_args, args[0], 0);
    624                 args += 3;
    625                 gen_args += 2;
    626                 continue;
    627             }
    628             break;
    629         CASE_OP_32_64(sub):
    630             {
    631                 TCGOpcode neg_op;
    632                 bool have_neg;
    633 
    634                 if (temps[args[2]].state == TCG_TEMP_CONST) {
    635                     /* Proceed with possible constant folding. */
    636                     break;
    637                 }
    638                 if (op == INDEX_op_sub_i32) {
    639                     neg_op = INDEX_op_neg_i32;
    640                     have_neg = TCG_TARGET_HAS_neg_i32;
    641                 } else {
    642                     neg_op = INDEX_op_neg_i64;
    643                     have_neg = TCG_TARGET_HAS_neg_i64;
    644                 }
    645                 if (!have_neg) {
    646                     break;
    647                 }
    648                 if (temps[args[1]].state == TCG_TEMP_CONST
    649                     && temps[args[1]].val == 0) {
    650                     s->gen_opc_buf[op_index] = neg_op;
    651                     reset_temp(args[0]);
    652                     gen_args[0] = args[0];
    653                     gen_args[1] = args[2];
    654                     args += 3;
    655                     gen_args += 2;
    656                     continue;
    657                 }
    658             }
    659             break;
    660         default:
    661             break;
    662         }
    663 
    664         /* Simplify expression for "op r, a, 0 => mov r, a" cases */
    665         switch (op) {
    666         CASE_OP_32_64(add):
    667         CASE_OP_32_64(sub):
    668         CASE_OP_32_64(shl):
    669         CASE_OP_32_64(shr):
    670         CASE_OP_32_64(sar):
    671         CASE_OP_32_64(rotl):
    672         CASE_OP_32_64(rotr):
    673         CASE_OP_32_64(or):
    674         CASE_OP_32_64(xor):
    675             if (temps[args[1]].state == TCG_TEMP_CONST) {
    676                 /* Proceed with possible constant folding. */
    677                 break;
    678             }
    679             if (temps[args[2]].state == TCG_TEMP_CONST
    680                 && temps[args[2]].val == 0) {
    681                 if (temps_are_copies(args[0], args[1])) {
    682                     s->gen_opc_buf[op_index] = INDEX_op_nop;
    683                 } else {
    684                     s->gen_opc_buf[op_index] = op_to_mov(op);
    685                     tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
    686                     gen_args += 2;
    687                 }
    688                 args += 3;
    689                 continue;
    690             }
    691             break;
    692         default:
    693             break;
    694         }
    695 
    696         /* Simplify using known-zero bits */
    697         mask = -1;
    698         affected = -1;
    699         switch (op) {
    700         CASE_OP_32_64(ext8s):
    701             if ((temps[args[1]].mask & 0x80) != 0) {
    702                 break;
    703             }
    704         CASE_OP_32_64(ext8u):
    705             mask = 0xff;
    706             goto and_const;
    707         CASE_OP_32_64(ext16s):
    708             if ((temps[args[1]].mask & 0x8000) != 0) {
    709                 break;
    710             }
    711         CASE_OP_32_64(ext16u):
    712             mask = 0xffff;
    713             goto and_const;
    714         case INDEX_op_ext32s_i64:
    715             if ((temps[args[1]].mask & 0x80000000) != 0) {
    716                 break;
    717             }
    718         case INDEX_op_ext32u_i64:
    719             mask = 0xffffffffU;
    720             goto and_const;
    721 
    722         CASE_OP_32_64(and):
    723             mask = temps[args[2]].mask;
    724             if (temps[args[2]].state == TCG_TEMP_CONST) {
    725         and_const:
    726                 affected = temps[args[1]].mask & ~mask;
    727             }
    728             mask = temps[args[1]].mask & mask;
    729             break;
    730 
    731         CASE_OP_32_64(sar):
    732             if (temps[args[2]].state == TCG_TEMP_CONST) {
    733                 mask = ((tcg_target_long)temps[args[1]].mask
    734                         >> temps[args[2]].val);
    735             }
    736             break;
    737 
    738         CASE_OP_32_64(shr):
    739             if (temps[args[2]].state == TCG_TEMP_CONST) {
    740                 mask = temps[args[1]].mask >> temps[args[2]].val;
    741             }
    742             break;
    743 
    744         CASE_OP_32_64(shl):
    745             if (temps[args[2]].state == TCG_TEMP_CONST) {
    746                 mask = temps[args[1]].mask << temps[args[2]].val;
    747             }
    748             break;
    749 
    750         CASE_OP_32_64(neg):
    751             /* Set to 1 all bits to the left of the rightmost.  */
    752             mask = -(temps[args[1]].mask & -temps[args[1]].mask);
    753             break;
    754 
    755         CASE_OP_32_64(deposit):
    756             tmp = ((1ull << args[4]) - 1);
    757             mask = ((temps[args[1]].mask & ~(tmp << args[3]))
    758                     | ((temps[args[2]].mask & tmp) << args[3]));
    759             break;
    760 
    761         CASE_OP_32_64(or):
    762         CASE_OP_32_64(xor):
    763             mask = temps[args[1]].mask | temps[args[2]].mask;
    764             break;
    765 
    766         CASE_OP_32_64(setcond):
    767             mask = 1;
    768             break;
    769 
    770         CASE_OP_32_64(movcond):
    771             mask = temps[args[3]].mask | temps[args[4]].mask;
    772             break;
    773 
    774         default:
    775             break;
    776         }
    777 
    778         if (mask == 0) {
    779             assert(def->nb_oargs == 1);
    780             s->gen_opc_buf[op_index] = op_to_movi(op);
    781             tcg_opt_gen_movi(gen_args, args[0], 0);
    782             args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
    783             gen_args += 2;
    784             continue;
    785         }
    786         if (affected == 0) {
    787             assert(def->nb_oargs == 1);
    788             if (temps_are_copies(args[0], args[1])) {
    789                 s->gen_opc_buf[op_index] = INDEX_op_nop;
    790             } else if (temps[args[1]].state != TCG_TEMP_CONST) {
    791                 s->gen_opc_buf[op_index] = op_to_mov(op);
    792                 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
    793                 gen_args += 2;
    794             } else {
    795                 s->gen_opc_buf[op_index] = op_to_movi(op);
    796                 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
    797                 gen_args += 2;
    798             }
    799             args += def->nb_iargs + 1;
    800             continue;
    801         }
    802 
    803         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
    804         switch (op) {
    805         CASE_OP_32_64(and):
    806         CASE_OP_32_64(mul):
    807         CASE_OP_32_64(muluh):
    808         CASE_OP_32_64(mulsh):
    809             if ((temps[args[2]].state == TCG_TEMP_CONST
    810                 && temps[args[2]].val == 0)) {
    811                 s->gen_opc_buf[op_index] = op_to_movi(op);
    812                 tcg_opt_gen_movi(gen_args, args[0], 0);
    813                 args += 3;
    814                 gen_args += 2;
    815                 continue;
    816             }
    817             break;
    818         default:
    819             break;
    820         }
    821 
    822         /* Simplify expression for "op r, a, a => mov r, a" cases */
    823         switch (op) {
    824         CASE_OP_32_64(or):
    825         CASE_OP_32_64(and):
    826             if (temps_are_copies(args[1], args[2])) {
    827                 if (temps_are_copies(args[0], args[1])) {
    828                     s->gen_opc_buf[op_index] = INDEX_op_nop;
    829                 } else {
    830                     s->gen_opc_buf[op_index] = op_to_mov(op);
    831                     tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
    832                     gen_args += 2;
    833                 }
    834                 args += 3;
    835                 continue;
    836             }
    837             break;
    838         default:
    839             break;
    840         }
    841 
    842         /* Simplify expression for "op r, a, a => movi r, 0" cases */
    843         switch (op) {
    844         CASE_OP_32_64(sub):
    845         CASE_OP_32_64(xor):
    846             if (temps_are_copies(args[1], args[2])) {
    847                 s->gen_opc_buf[op_index] = op_to_movi(op);
    848                 tcg_opt_gen_movi(gen_args, args[0], 0);
    849                 gen_args += 2;
    850                 args += 3;
    851                 continue;
    852             }
    853             break;
    854         default:
    855             break;
    856         }
    857 
    858         /* Propagate constants through copy operations and do constant
    859            folding.  Constants will be substituted to arguments by register
    860            allocator where needed and possible.  Also detect copies. */
    861         switch (op) {
    862         CASE_OP_32_64(mov):
    863             if (temps_are_copies(args[0], args[1])) {
    864                 args += 2;
    865                 s->gen_opc_buf[op_index] = INDEX_op_nop;
    866                 break;
    867             }
    868             if (temps[args[1]].state != TCG_TEMP_CONST) {
    869                 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
    870                 gen_args += 2;
    871                 args += 2;
    872                 break;
    873             }
    874             /* Source argument is constant.  Rewrite the operation and
    875                let movi case handle it. */
    876             op = op_to_movi(op);
    877             s->gen_opc_buf[op_index] = op;
    878             args[1] = temps[args[1]].val;
    879             /* fallthrough */
    880         CASE_OP_32_64(movi):
    881             tcg_opt_gen_movi(gen_args, args[0], args[1]);
    882             gen_args += 2;
    883             args += 2;
    884             break;
    885 
    886         CASE_OP_32_64(not):
    887         CASE_OP_32_64(neg):
    888         CASE_OP_32_64(ext8s):
    889         CASE_OP_32_64(ext8u):
    890         CASE_OP_32_64(ext16s):
    891         CASE_OP_32_64(ext16u):
    892         case INDEX_op_ext32s_i64:
    893         case INDEX_op_ext32u_i64:
    894             if (temps[args[1]].state == TCG_TEMP_CONST) {
    895                 s->gen_opc_buf[op_index] = op_to_movi(op);
    896                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
    897                 tcg_opt_gen_movi(gen_args, args[0], tmp);
    898                 gen_args += 2;
    899                 args += 2;
    900                 break;
    901             }
    902             goto do_default;
    903 
    904         CASE_OP_32_64(add):
    905         CASE_OP_32_64(sub):
    906         CASE_OP_32_64(mul):
    907         CASE_OP_32_64(or):
    908         CASE_OP_32_64(and):
    909         CASE_OP_32_64(xor):
    910         CASE_OP_32_64(shl):
    911         CASE_OP_32_64(shr):
    912         CASE_OP_32_64(sar):
    913         CASE_OP_32_64(rotl):
    914         CASE_OP_32_64(rotr):
    915         CASE_OP_32_64(andc):
    916         CASE_OP_32_64(orc):
    917         CASE_OP_32_64(eqv):
    918         CASE_OP_32_64(nand):
    919         CASE_OP_32_64(nor):
    920         CASE_OP_32_64(muluh):
    921         CASE_OP_32_64(mulsh):
    922         CASE_OP_32_64(div):
    923         CASE_OP_32_64(divu):
    924         CASE_OP_32_64(rem):
    925         CASE_OP_32_64(remu):
    926             if (temps[args[1]].state == TCG_TEMP_CONST
    927                 && temps[args[2]].state == TCG_TEMP_CONST) {
    928                 s->gen_opc_buf[op_index] = op_to_movi(op);
    929                 tmp = do_constant_folding(op, temps[args[1]].val,
    930                                           temps[args[2]].val);
    931                 tcg_opt_gen_movi(gen_args, args[0], tmp);
    932                 gen_args += 2;
    933                 args += 3;
    934                 break;
    935             }
    936             goto do_default;
    937 
    938         CASE_OP_32_64(deposit):
    939             if (temps[args[1]].state == TCG_TEMP_CONST
    940                 && temps[args[2]].state == TCG_TEMP_CONST) {
    941                 s->gen_opc_buf[op_index] = op_to_movi(op);
    942                 tmp = ((1ull << args[4]) - 1);
    943                 tmp = (temps[args[1]].val & ~(tmp << args[3]))
    944                       | ((temps[args[2]].val & tmp) << args[3]);
    945                 tcg_opt_gen_movi(gen_args, args[0], tmp);
    946                 gen_args += 2;
    947                 args += 5;
    948                 break;
    949             }
    950             goto do_default;
    951 
    952         CASE_OP_32_64(setcond):
    953             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
    954             if (tmp != 2) {
    955                 s->gen_opc_buf[op_index] = op_to_movi(op);
    956                 tcg_opt_gen_movi(gen_args, args[0], tmp);
    957                 gen_args += 2;
    958                 args += 4;
    959                 break;
    960             }
    961             goto do_default;
    962 
    963         CASE_OP_32_64(brcond):
    964             tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
    965             if (tmp != 2) {
    966                 if (tmp) {
    967                     reset_all_temps(nb_temps);
    968                     s->gen_opc_buf[op_index] = INDEX_op_br;
    969                     gen_args[0] = args[3];
    970                     gen_args += 1;
    971                 } else {
    972                     s->gen_opc_buf[op_index] = INDEX_op_nop;
    973                 }
    974                 args += 4;
    975                 break;
    976             }
    977             goto do_default;
    978 
    979         CASE_OP_32_64(movcond):
    980             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
    981             if (tmp != 2) {
    982                 if (temps_are_copies(args[0], args[4-tmp])) {
    983                     s->gen_opc_buf[op_index] = INDEX_op_nop;
    984                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
    985                     s->gen_opc_buf[op_index] = op_to_movi(op);
    986                     tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
    987                     gen_args += 2;
    988                 } else {
    989                     s->gen_opc_buf[op_index] = op_to_mov(op);
    990                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
    991                     gen_args += 2;
    992                 }
    993                 args += 6;
    994                 break;
    995             }
    996             goto do_default;
    997 
    998         case INDEX_op_add2_i32:
    999         case INDEX_op_sub2_i32:
   1000             if (temps[args[2]].state == TCG_TEMP_CONST
   1001                 && temps[args[3]].state == TCG_TEMP_CONST
   1002                 && temps[args[4]].state == TCG_TEMP_CONST
   1003                 && temps[args[5]].state == TCG_TEMP_CONST) {
   1004                 uint32_t al = temps[args[2]].val;
   1005                 uint32_t ah = temps[args[3]].val;
   1006                 uint32_t bl = temps[args[4]].val;
   1007                 uint32_t bh = temps[args[5]].val;
   1008                 uint64_t a = ((uint64_t)ah << 32) | al;
   1009                 uint64_t b = ((uint64_t)bh << 32) | bl;
   1010                 TCGArg rl, rh;
   1011 
   1012                 if (op == INDEX_op_add2_i32) {
   1013                     a += b;
   1014                 } else {
   1015                     a -= b;
   1016                 }
   1017 
   1018                 /* We emit the extra nop when we emit the add2/sub2.  */
   1019                 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
   1020 
   1021                 rl = args[0];
   1022                 rh = args[1];
   1023                 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
   1024                 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
   1025                 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
   1026                 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
   1027                 gen_args += 4;
   1028                 args += 6;
   1029                 break;
   1030             }
   1031             goto do_default;
   1032 
   1033         case INDEX_op_mulu2_i32:
   1034             if (temps[args[2]].state == TCG_TEMP_CONST
   1035                 && temps[args[3]].state == TCG_TEMP_CONST) {
   1036                 uint32_t a = temps[args[2]].val;
   1037                 uint32_t b = temps[args[3]].val;
   1038                 uint64_t r = (uint64_t)a * b;
   1039                 TCGArg rl, rh;
   1040 
   1041                 /* We emit the extra nop when we emit the mulu2.  */
   1042                 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
   1043 
   1044                 rl = args[0];
   1045                 rh = args[1];
   1046                 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
   1047                 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
   1048                 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
   1049                 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
   1050                 gen_args += 4;
   1051                 args += 4;
   1052                 break;
   1053             }
   1054             goto do_default;
   1055 
   1056         case INDEX_op_brcond2_i32:
   1057             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
   1058             if (tmp != 2) {
   1059                 if (tmp) {
   1060                     reset_all_temps(nb_temps);
   1061                     s->gen_opc_buf[op_index] = INDEX_op_br;
   1062                     gen_args[0] = args[5];
   1063                     gen_args += 1;
   1064                 } else {
   1065                     s->gen_opc_buf[op_index] = INDEX_op_nop;
   1066                 }
   1067             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
   1068                        && temps[args[2]].state == TCG_TEMP_CONST
   1069                        && temps[args[3]].state == TCG_TEMP_CONST
   1070                        && temps[args[2]].val == 0
   1071                        && temps[args[3]].val == 0) {
   1072                 /* Simplify LT/GE comparisons vs zero to a single compare
   1073                    vs the high word of the input.  */
   1074                 reset_all_temps(nb_temps);
   1075                 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
   1076                 gen_args[0] = args[1];
   1077                 gen_args[1] = args[3];
   1078                 gen_args[2] = args[4];
   1079                 gen_args[3] = args[5];
   1080                 gen_args += 4;
   1081             } else {
   1082                 goto do_default;
   1083             }
   1084             args += 6;
   1085             break;
   1086 
   1087         case INDEX_op_setcond2_i32:
   1088             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
   1089             if (tmp != 2) {
   1090                 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
   1091                 tcg_opt_gen_movi(gen_args, args[0], tmp);
   1092                 gen_args += 2;
   1093             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
   1094                        && temps[args[3]].state == TCG_TEMP_CONST
   1095                        && temps[args[4]].state == TCG_TEMP_CONST
   1096                        && temps[args[3]].val == 0
   1097                        && temps[args[4]].val == 0) {
   1098                 /* Simplify LT/GE comparisons vs zero to a single compare
   1099                    vs the high word of the input.  */
   1100                 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
   1101                 reset_temp(args[0]);
   1102                 gen_args[0] = args[0];
   1103                 gen_args[1] = args[2];
   1104                 gen_args[2] = args[4];
   1105                 gen_args[3] = args[5];
   1106                 gen_args += 4;
   1107             } else {
   1108                 goto do_default;
   1109             }
   1110             args += 6;
   1111             break;
   1112 
   1113         case INDEX_op_call:
   1114             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
   1115             if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
   1116                                             TCG_CALL_NO_WRITE_GLOBALS))) {
   1117                 for (i = 0; i < nb_globals; i++) {
   1118                     reset_temp(i);
   1119                 }
   1120             }
   1121             for (i = 0; i < (args[0] >> 16); i++) {
   1122                 reset_temp(args[i + 1]);
   1123             }
   1124             i = nb_call_args + 3;
   1125             while (i) {
   1126                 *gen_args = *args;
   1127                 args++;
   1128                 gen_args++;
   1129                 i--;
   1130             }
   1131             break;
   1132 
   1133         default:
   1134         do_default:
   1135             /* Default case: we know nothing about operation (or were unable
   1136                to compute the operation result) so no propagation is done.
   1137                We trash everything if the operation is the end of a basic
   1138                block, otherwise we only trash the output args.  "mask" is
   1139                the non-zero bits mask for the first output arg.  */
   1140             if (def->flags & TCG_OPF_BB_END) {
   1141                 reset_all_temps(nb_temps);
   1142             } else {
   1143                 for (i = 0; i < def->nb_oargs; i++) {
   1144                     reset_temp(args[i]);
   1145                 }
   1146             }
   1147             for (i = 0; i < def->nb_args; i++) {
   1148                 gen_args[i] = args[i];
   1149             }
   1150             args += def->nb_args;
   1151             gen_args += def->nb_args;
   1152             break;
   1153         }
   1154     }
   1155 
   1156     return gen_args;
   1157 }
   1158 
   1159 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
   1160         TCGArg *args, TCGOpDef *tcg_op_defs)
   1161 {
   1162     TCGArg *res;
   1163     res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
   1164     return res;
   1165 }
   1166