Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                             guest_arm64_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2013-2013 OpenWorks
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_basictypes.h"
     32 #include "libvex_emnote.h"
     33 #include "libvex_guest_arm64.h"
     34 #include "libvex_ir.h"
     35 #include "libvex.h"
     36 
     37 #include "main_util.h"
     38 #include "main_globals.h"
     39 #include "guest_generic_bb_to_IR.h"
     40 #include "guest_arm64_defs.h"
     41 
     42 
     43 /* This file contains helper functions for arm guest code.  Calls to
     44    these functions are generated by the back end.  These calls are of
     45    course in the host machine code and this file will be compiled to
     46    host machine code, so that all makes sense.
     47 
     48    Only change the signatures of these helper functions very
     49    carefully.  If you change the signature here, you'll have to change
     50    the parameters passed to it in the IR calls constructed by
     51    guest_arm64_toIR.c.
     52 */
     53 
     54 
     55 /* Set to 1 to get detailed profiling info about individual N, Z, C
     56    and V flag evaluation. */
     57 #define PROFILE_NZCV_FLAGS 0
     58 
     59 #if PROFILE_NZCV_FLAGS
     60 
     61 static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
     62 static UInt initted = 0;
     63 static UInt tot_evals = 0;
     64 
     65 static void initCounts ( void )
     66 {
     67    UInt i, j;
     68    for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
     69       for (j = 0; j < 16; j++) {
     70          tab_eval[i][j] = 0;
     71       }
     72    }
     73    initted = 1;
     74 }
     75 
     76 static void showCounts ( void )
     77 {
     78    const HChar* nameCC[16]
     79       = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
     80           "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
     81    UInt i, j;
     82    ULong sum = 0;
     83    vex_printf("\nCC_OP          0         1         2         3    "
     84               "     4         5         6\n");
     85    vex_printf(  "--------------------------------------------------"
     86               "--------------------------\n");
     87    for (j = 0; j < 16; j++) {
     88       vex_printf("%2d %s  ", j, nameCC[j]);
     89       for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
     90          vex_printf("%9d ", tab_eval[i][j]);
     91          sum += tab_eval[i][j];
     92       }
     93       vex_printf("\n");
     94    }
     95    vex_printf("(In total %llu calls)\n", sum);
     96 }
     97 
     98 #define NOTE_EVAL(_cc_op, _cond) \
     99    do { \
    100       if (!initted) initCounts(); \
    101       vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
    102       vassert( ((UInt)(_cond)) < 16); \
    103       tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++;  \
    104       tot_evals++; \
    105       if (0 == (tot_evals & 0x7FFF)) \
    106         showCounts(); \
    107    } while (0)
    108 
    109 #endif /* PROFILE_NZCV_FLAGS */
    110 
    111 
    112 /* Calculate the N flag from the supplied thunk components, in the
    113    least significant bit of the word.  Returned bits 63:1 are zero. */
    114 static
    115 ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
    116                                 ULong cc_dep2, ULong cc_dep3 )
    117 {
    118    switch (cc_op) {
    119       case ARM64G_CC_OP_COPY: {
    120          /* (nzcv:28x0, unused, unused) */
    121          ULong nf   = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
    122          return nf;
    123       }
    124       case ARM64G_CC_OP_ADD32: {
    125          /* (argL, argR, unused) */
    126          UInt  argL = (UInt)cc_dep1;
    127          UInt  argR = (UInt)cc_dep2;
    128          UInt  res  = argL + argR;
    129          ULong nf   = (ULong)(res >> 31);
    130          return nf;
    131       }
    132       case ARM64G_CC_OP_ADD64: {
    133          /* (argL, argR, unused) */
    134          ULong argL = cc_dep1;
    135          ULong argR = cc_dep2;
    136          ULong res  = argL + argR;
    137          ULong nf   = (ULong)(res >> 63);
    138          return nf;
    139       }
    140       case ARM64G_CC_OP_SUB32: {
    141          /* (argL, argR, unused) */
    142          UInt  argL = (UInt)cc_dep1;
    143          UInt  argR = (UInt)cc_dep2;
    144          UInt  res  = argL - argR;
    145          ULong nf   = (ULong)(res >> 31);
    146          return nf;
    147       }
    148       case ARM64G_CC_OP_SUB64: {
    149          /* (argL, argR, unused) */
    150          ULong argL = cc_dep1;
    151          ULong argR = cc_dep2;
    152          ULong res  = argL - argR;
    153          ULong nf   = res >> 63;
    154          return nf;
    155       }
    156       case ARM64G_CC_OP_ADC32: {
    157          /* (argL, argR, oldC) */
    158          UInt  argL = cc_dep1;
    159          UInt  argR = cc_dep2;
    160          UInt  oldC = cc_dep3;
    161          vassert((oldC & ~1) == 0);
    162          UInt  res  = argL + argR + oldC;
    163          ULong nf   = res >> 31;
    164          return nf;
    165       }
    166       case ARM64G_CC_OP_ADC64: {
    167          /* (argL, argR, oldC) */
    168          ULong argL = cc_dep1;
    169          ULong argR = cc_dep2;
    170          ULong oldC = cc_dep3;
    171          vassert((oldC & ~1) == 0);
    172          ULong res  = argL + argR + oldC;
    173          ULong nf   = res >> 63;
    174          return nf;
    175       }
    176       case ARM64G_CC_OP_SBC32: {
    177          /* (argL, argR, oldC) */
    178          UInt  argL = cc_dep1;
    179          UInt  argR = cc_dep2;
    180          UInt  oldC = cc_dep3;
    181          vassert((oldC & ~1) == 0);
    182          UInt  res  = argL - argR - (oldC ^ 1);
    183          ULong nf   = res >> 31;
    184          return nf;
    185       }
    186       case ARM64G_CC_OP_SBC64: {
    187          /* (argL, argR, oldC) */
    188          ULong argL = cc_dep1;
    189          ULong argR = cc_dep2;
    190          ULong oldC = cc_dep3;
    191          vassert((oldC & ~1) == 0);
    192          ULong res  = argL - argR - (oldC ^ 1);
    193          ULong nf   = res >> 63;
    194          return nf;
    195       }
    196       case ARM64G_CC_OP_LOGIC32: {
    197          /* (res, unused, unused) */
    198          UInt  res = (UInt)cc_dep1;
    199          ULong nf  = res >> 31;
    200          return nf;
    201       }
    202       case ARM64G_CC_OP_LOGIC64: {
    203          /* (res, unused, unused) */
    204          ULong res = cc_dep1;
    205          ULong nf  = res >> 63;
    206          return nf;
    207       }
    208 //ZZ       case ARMG_CC_OP_MUL: {
    209 //ZZ          /* (res, unused, oldC:oldV) */
    210 //ZZ          UInt res  = cc_dep1;
    211 //ZZ          UInt nf   = res >> 31;
    212 //ZZ          return nf;
    213 //ZZ       }
    214 //ZZ       case ARMG_CC_OP_MULL: {
    215 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    216 //ZZ          UInt resHi32 = cc_dep2;
    217 //ZZ          UInt nf      = resHi32 >> 31;
    218 //ZZ          return nf;
    219 //ZZ       }
    220       default:
    221          /* shouldn't really make these calls from generated code */
    222          vex_printf("arm64g_calculate_flag_n"
    223                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    224                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    225          vpanic("arm64g_calculate_flag_n");
    226    }
    227 }
    228 
    229 
    230 /* Calculate the Z flag from the supplied thunk components, in the
    231    least significant bit of the word.  Returned bits 63:1 are zero. */
    232 static
    233 ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
    234                                 ULong cc_dep2, ULong cc_dep3 )
    235 {
    236    switch (cc_op) {
    237       case ARM64G_CC_OP_COPY: {
    238          /* (nzcv:28x0, unused, unused) */
    239          ULong zf   = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
    240          return zf;
    241       }
    242       case ARM64G_CC_OP_ADD32: {
    243          /* (argL, argR, unused) */
    244          UInt  argL = (UInt)cc_dep1;
    245          UInt  argR = (UInt)cc_dep2;
    246          UInt  res  = argL + argR;
    247          ULong zf   = res == 0;
    248          return zf;
    249       }
    250       case ARM64G_CC_OP_ADD64: {
    251          /* (argL, argR, unused) */
    252          ULong argL = cc_dep1;
    253          ULong argR = cc_dep2;
    254          ULong res  = argL + argR;
    255          ULong zf   = res == 0;
    256          return zf;
    257       }
    258       case ARM64G_CC_OP_SUB32: {
    259          /* (argL, argR, unused) */
    260          UInt  argL = (UInt)cc_dep1;
    261          UInt  argR = (UInt)cc_dep2;
    262          UInt  res  = argL - argR;
    263          ULong zf   = res == 0;
    264          return zf;
    265       }
    266       case ARM64G_CC_OP_SUB64: {
    267          /* (argL, argR, unused) */
    268          ULong argL = cc_dep1;
    269          ULong argR = cc_dep2;
    270          ULong res  = argL - argR;
    271          ULong zf   = res == 0;
    272          return zf;
    273       }
    274       case ARM64G_CC_OP_ADC32: {
    275          /* (argL, argR, oldC) */
    276          UInt  argL = cc_dep1;
    277          UInt  argR = cc_dep2;
    278          UInt  oldC = cc_dep3;
    279          vassert((oldC & ~1) == 0);
    280          UInt  res  = argL + argR + oldC;
    281          ULong zf   = res == 0;
    282          return zf;
    283       }
    284       case ARM64G_CC_OP_ADC64: {
    285          /* (argL, argR, oldC) */
    286          ULong argL = cc_dep1;
    287          ULong argR = cc_dep2;
    288          ULong oldC = cc_dep3;
    289          vassert((oldC & ~1) == 0);
    290          ULong res  = argL + argR + oldC;
    291          ULong zf   = res == 0;
    292          return zf;
    293       }
    294       case ARM64G_CC_OP_SBC32: {
    295          /* (argL, argR, oldC) */
    296          UInt  argL = cc_dep1;
    297          UInt  argR = cc_dep2;
    298          UInt  oldC = cc_dep3;
    299          vassert((oldC & ~1) == 0);
    300          UInt  res  = argL - argR - (oldC ^ 1);
    301          ULong zf   = res == 0;
    302          return zf;
    303       }
    304       case ARM64G_CC_OP_SBC64: {
    305          /* (argL, argR, oldC) */
    306          ULong argL = cc_dep1;
    307          ULong argR = cc_dep2;
    308          ULong oldC = cc_dep3;
    309          vassert((oldC & ~1) == 0);
    310          ULong res  = argL - argR - (oldC ^ 1);
    311          ULong zf   = res == 0;
    312          return zf;
    313       }
    314       case ARM64G_CC_OP_LOGIC32: {
    315          /* (res, unused, unused) */
    316          UInt  res  = (UInt)cc_dep1;
    317          ULong zf   = res == 0;
    318          return zf;
    319       }
    320       case ARM64G_CC_OP_LOGIC64: {
    321          /* (res, unused, unused) */
    322          ULong res  = cc_dep1;
    323          ULong zf   = res == 0;
    324          return zf;
    325       }
    326 //ZZ       case ARMG_CC_OP_MUL: {
    327 //ZZ          /* (res, unused, oldC:oldV) */
    328 //ZZ          UInt res  = cc_dep1;
    329 //ZZ          UInt zf   = res == 0;
    330 //ZZ          return zf;
    331 //ZZ       }
    332 //ZZ       case ARMG_CC_OP_MULL: {
    333 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    334 //ZZ          UInt resLo32 = cc_dep1;
    335 //ZZ          UInt resHi32 = cc_dep2;
    336 //ZZ          UInt zf      = (resHi32|resLo32) == 0;
    337 //ZZ          return zf;
    338 //ZZ       }
    339       default:
    340          /* shouldn't really make these calls from generated code */
    341          vex_printf("arm64g_calculate_flag_z"
    342                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    343                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    344          vpanic("arm64g_calculate_flag_z");
    345    }
    346 }
    347 
    348 
    349 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    350 /* Calculate the C flag from the supplied thunk components, in the
    351    least significant bit of the word.  Returned bits 63:1 are zero. */
    352 ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
    353                                 ULong cc_dep2, ULong cc_dep3 )
    354 {
    355    switch (cc_op) {
    356       case ARM64G_CC_OP_COPY: {
    357          /* (nzcv:28x0, unused, unused) */
    358          ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
    359          return cf;
    360       }
    361       case ARM64G_CC_OP_ADD32: {
    362          /* (argL, argR, unused) */
    363          UInt  argL = (UInt)cc_dep1;
    364          UInt  argR = (UInt)cc_dep2;
    365          UInt  res  = argL + argR;
    366          ULong cf   = res < argL;
    367          return cf;
    368       }
    369       case ARM64G_CC_OP_ADD64: {
    370          /* (argL, argR, unused) */
    371          ULong argL = cc_dep1;
    372          ULong argR = cc_dep2;
    373          ULong res  = argL + argR;
    374          ULong cf   = res < argL;
    375          return cf;
    376       }
    377       case ARM64G_CC_OP_SUB32: {
    378          /* (argL, argR, unused) */
    379          UInt  argL = (UInt)cc_dep1;
    380          UInt  argR = (UInt)cc_dep2;
    381          ULong cf   = argL >= argR;
    382          return cf;
    383       }
    384       case ARM64G_CC_OP_SUB64: {
    385          /* (argL, argR, unused) */
    386          ULong argL = cc_dep1;
    387          ULong argR = cc_dep2;
    388          ULong cf   = argL >= argR;
    389          return cf;
    390       }
    391       case ARM64G_CC_OP_ADC32: {
    392          /* (argL, argR, oldC) */
    393          UInt  argL = cc_dep1;
    394          UInt  argR = cc_dep2;
    395          UInt  oldC = cc_dep3;
    396          vassert((oldC & ~1) == 0);
    397          UInt  res  = argL + argR + oldC;
    398          ULong cf   = oldC ? (res <= argL) : (res < argL);
    399          return cf;
    400       }
    401       case ARM64G_CC_OP_ADC64: {
    402          /* (argL, argR, oldC) */
    403          ULong argL = cc_dep1;
    404          ULong argR = cc_dep2;
    405          ULong oldC = cc_dep3;
    406          vassert((oldC & ~1) == 0);
    407          ULong res  = argL + argR + oldC;
    408          ULong cf   = oldC ? (res <= argL) : (res < argL);
    409          return cf;
    410       }
    411       case ARM64G_CC_OP_SBC32: {
    412          /* (argL, argR, oldC) */
    413          UInt  argL = cc_dep1;
    414          UInt  argR = cc_dep2;
    415          UInt  oldC = cc_dep3;
    416          vassert((oldC & ~1) == 0);
    417          ULong cf   = oldC ? (argL >= argR) : (argL > argR);
    418          return cf;
    419       }
    420       case ARM64G_CC_OP_SBC64: {
    421          /* (argL, argR, oldC) */
    422          ULong argL = cc_dep1;
    423          ULong argR = cc_dep2;
    424          ULong oldC = cc_dep3;
    425          vassert((oldC & ~1) == 0);
    426          ULong cf   = oldC ? (argL >= argR) : (argL > argR);
    427          return cf;
    428       }
    429       case ARM64G_CC_OP_LOGIC32:
    430       case ARM64G_CC_OP_LOGIC64: {
    431          /* (res, unused, unused) */
    432          return 0; // C after logic is zero on arm64
    433       }
    434 //ZZ       case ARMG_CC_OP_MUL: {
    435 //ZZ          /* (res, unused, oldC:oldV) */
    436 //ZZ          UInt oldC = (cc_dep3 >> 1) & 1;
    437 //ZZ          vassert((cc_dep3 & ~3) == 0);
    438 //ZZ          UInt cf   = oldC;
    439 //ZZ          return cf;
    440 //ZZ       }
    441 //ZZ       case ARMG_CC_OP_MULL: {
    442 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    443 //ZZ          UInt oldC    = (cc_dep3 >> 1) & 1;
    444 //ZZ          vassert((cc_dep3 & ~3) == 0);
    445 //ZZ          UInt cf      = oldC;
    446 //ZZ          return cf;
    447 //ZZ       }
    448       default:
    449          /* shouldn't really make these calls from generated code */
    450          vex_printf("arm64g_calculate_flag_c"
    451                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    452                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    453          vpanic("arm64g_calculate_flag_c");
    454    }
    455 }
    456 
    457 
    458 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    459 /* Calculate the V flag from the supplied thunk components, in the
    460    least significant bit of the word.  Returned bits 63:1 are zero. */
    461 static
    462 ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
    463                                 ULong cc_dep2, ULong cc_dep3 )
    464 {
    465    switch (cc_op) {
    466       case ARM64G_CC_OP_COPY: {
    467          /* (nzcv:28x0, unused, unused) */
    468          ULong vf   = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
    469          return vf;
    470       }
    471       case ARM64G_CC_OP_ADD32: {
    472          /* (argL, argR, unused) */
    473          UInt  argL = (UInt)cc_dep1;
    474          UInt  argR = (UInt)cc_dep2;
    475          UInt  res  = argL + argR;
    476          ULong vf   = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
    477          return vf;
    478       }
    479       case ARM64G_CC_OP_ADD64: {
    480          /* (argL, argR, unused) */
    481          ULong argL = cc_dep1;
    482          ULong argR = cc_dep2;
    483          ULong res  = argL + argR;
    484          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 63;
    485          return vf;
    486       }
    487       case ARM64G_CC_OP_SUB32: {
    488          /* (argL, argR, unused) */
    489          UInt  argL = (UInt)cc_dep1;
    490          UInt  argR = (UInt)cc_dep2;
    491          UInt  res  = argL - argR;
    492          ULong vf   = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
    493          return vf;
    494       }
    495       case ARM64G_CC_OP_SUB64: {
    496          /* (argL, argR, unused) */
    497          ULong argL = cc_dep1;
    498          ULong argR = cc_dep2;
    499          ULong res  = argL - argR;
    500          ULong vf   = (((argL ^ argR) & (argL ^ res))) >> 63;
    501          return vf;
    502       }
    503       case ARM64G_CC_OP_ADC32: {
    504          /* (argL, argR, oldC) */
    505          UInt  argL = cc_dep1;
    506          UInt  argR = cc_dep2;
    507          UInt  oldC = cc_dep3;
    508          vassert((oldC & ~1) == 0);
    509          UInt  res  = argL + argR + oldC;
    510          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 31;
    511          return vf;
    512       }
    513       case ARM64G_CC_OP_ADC64: {
    514          /* (argL, argR, oldC) */
    515          ULong argL = cc_dep1;
    516          ULong argR = cc_dep2;
    517          ULong oldC = cc_dep3;
    518          vassert((oldC & ~1) == 0);
    519          ULong res  = argL + argR + oldC;
    520          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 63;
    521          return vf;
    522       }
    523       case ARM64G_CC_OP_SBC32: {
    524          /* (argL, argR, oldC) */
    525          UInt  argL = cc_dep1;
    526          UInt  argR = cc_dep2;
    527          UInt  oldC = cc_dep3;
    528          vassert((oldC & ~1) == 0);
    529          UInt  res  = argL - argR - (oldC ^ 1);
    530          ULong vf   = ((argL ^ argR) & (argL ^ res)) >> 31;
    531          return vf;
    532       }
    533       case ARM64G_CC_OP_SBC64: {
    534          /* (argL, argR, oldC) */
    535          ULong argL = cc_dep1;
    536          ULong argR = cc_dep2;
    537          ULong oldC = cc_dep3;
    538          vassert((oldC & ~1) == 0);
    539          ULong res  = argL - argR - (oldC ^ 1);
    540          ULong vf   = ((argL ^ argR) & (argL ^ res)) >> 63;
    541          return vf;
    542       }
    543       case ARM64G_CC_OP_LOGIC32:
    544       case ARM64G_CC_OP_LOGIC64: {
    545          /* (res, unused, unused) */
    546          return 0; // V after logic is zero on arm64
    547       }
    548 //ZZ       case ARMG_CC_OP_MUL: {
    549 //ZZ          /* (res, unused, oldC:oldV) */
    550 //ZZ          UInt oldV = (cc_dep3 >> 0) & 1;
    551 //ZZ          vassert((cc_dep3 & ~3) == 0);
    552 //ZZ          UInt vf   = oldV;
    553 //ZZ          return vf;
    554 //ZZ       }
    555 //ZZ       case ARMG_CC_OP_MULL: {
    556 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    557 //ZZ          UInt oldV    = (cc_dep3 >> 0) & 1;
    558 //ZZ          vassert((cc_dep3 & ~3) == 0);
    559 //ZZ          UInt vf      = oldV;
    560 //ZZ          return vf;
    561 //ZZ       }
    562       default:
    563          /* shouldn't really make these calls from generated code */
    564          vex_printf("arm64g_calculate_flag_v"
    565                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    566                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    567          vpanic("arm64g_calculate_flag_v");
    568    }
    569 }
    570 
    571 
    572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    573 /* Calculate NZCV from the supplied thunk components, in the positions
    574    they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
    575    Returned bits 27:0 are zero. */
    576 ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
    577                                     ULong cc_dep2, ULong cc_dep3 )
    578 {
    579    ULong f;
    580    ULong res = 0;
    581    f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    582    res |= (f << ARM64G_CC_SHIFT_N);
    583    f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    584    res |= (f << ARM64G_CC_SHIFT_Z);
    585    f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
    586    res |= (f << ARM64G_CC_SHIFT_C);
    587    f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    588    res |= (f << ARM64G_CC_SHIFT_V);
    589    return res;
    590 }
    591 
    592 //ZZ
    593 //ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    594 //ZZ /* Calculate the QC flag from the arguments, in the lowest bit
    595 //ZZ    of the word (bit 0).  Urr, having this out of line is bizarre.
    596 //ZZ    Push back inline. */
    597 //ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
    598 //ZZ                               UInt resR1, UInt resR2 )
    599 //ZZ {
    600 //ZZ    if (resL1 != resR1 || resL2 != resR2)
    601 //ZZ       return 1;
    602 //ZZ    else
    603 //ZZ       return 0;
    604 //ZZ }
    605 
    606 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    607 /* Calculate the specified condition from the thunk components, in the
    608    lowest bit of the word (bit 0).  Returned bits 63:1 are zero. */
    609 ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
    610                                    ULong cond_n_op ,
    611                                    ULong cc_dep1,
    612                                    ULong cc_dep2, ULong cc_dep3 )
    613 {
    614    ULong cond  = cond_n_op >> 4;
    615    ULong cc_op = cond_n_op & 0xF;
    616    ULong inv   = cond & 1;
    617    ULong nf, zf, vf, cf;
    618 
    619 #  if PROFILE_NZCV_FLAGS
    620    NOTE_EVAL(cc_op, cond);
    621 #  endif
    622 
    623    //   vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
    624    //              cond_n_op, cc_dep1, cc_dep2, cc_dep3);
    625 
    626    switch (cond) {
    627       case ARM64CondEQ:    // Z=1         => z
    628       case ARM64CondNE:    // Z=0
    629          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    630          return inv ^ zf;
    631 
    632       case ARM64CondCS:    // C=1         => c
    633       case ARM64CondCC:    // C=0
    634          cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
    635          return inv ^ cf;
    636 
    637       case ARM64CondMI:    // N=1         => n
    638       case ARM64CondPL:    // N=0
    639          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    640          return inv ^ nf;
    641 
    642       case ARM64CondVS:    // V=1         => v
    643       case ARM64CondVC:    // V=0
    644          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    645          return inv ^ vf;
    646 
    647       case ARM64CondHI:    // C=1 && Z=0   => c & ~z
    648       case ARM64CondLS:    // C=0 || Z=1
    649          cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
    650          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    651          return inv ^ (1 & (cf & ~zf));
    652 
    653       case ARM64CondGE:    // N=V          => ~(n^v)
    654       case ARM64CondLT:    // N!=V
    655          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    656          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    657          return inv ^ (1 & ~(nf ^ vf));
    658 
    659       case ARM64CondGT:    // Z=0 && N=V   => ~z & ~(n^v)  =>  ~(z | (n^v))
    660       case ARM64CondLE:    // Z=1 || N!=V
    661          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    662          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    663          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    664          return inv ^ (1 & ~(zf | (nf ^ vf)));
    665 
    666       case ARM64CondAL:    // 1
    667       case ARM64CondNV:    // 1
    668          return 1;
    669 
    670       default:
    671          /* shouldn't really make these calls from generated code */
    672          vex_printf("arm64g_calculate_condition(ARM64)"
    673                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
    674                     cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
    675          vpanic("armg_calculate_condition(ARM64)");
    676    }
    677 }
    678 
    679 
    680 /*---------------------------------------------------------------*/
    681 /*--- Flag-helpers translation-time function specialisers.    ---*/
    682 /*--- These help iropt specialise calls the above run-time    ---*/
    683 /*--- flags functions.                                        ---*/
    684 /*---------------------------------------------------------------*/
    685 
    686 /* Used by the optimiser to try specialisations.  Returns an
    687    equivalent expression, or NULL if none. */
    688 
    689 static Bool isU64 ( IRExpr* e, ULong n )
    690 {
    691    return
    692       toBool( e->tag == Iex_Const
    693               && e->Iex.Const.con->tag == Ico_U64
    694               && e->Iex.Const.con->Ico.U64 == n );
    695 }
    696 
    697 IRExpr* guest_arm64_spechelper ( const HChar* function_name,
    698                                  IRExpr** args,
    699                                  IRStmt** precedingStmts,
    700                                  Int      n_precedingStmts )
    701 {
    702 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
    703 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
    704 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
    705 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
    706 
    707    Int i, arity = 0;
    708    for (i = 0; args[i]; i++)
    709       arity++;
    710 //ZZ #  if 0
    711 //ZZ    vex_printf("spec request:\n");
    712 //ZZ    vex_printf("   %s  ", function_name);
    713 //ZZ    for (i = 0; i < arity; i++) {
    714 //ZZ       vex_printf("  ");
    715 //ZZ       ppIRExpr(args[i]);
    716 //ZZ    }
    717 //ZZ    vex_printf("\n");
    718 //ZZ #  endif
    719 
    720    /* --------- specialising "arm64g_calculate_condition" --------- */
    721 
    722    if (vex_streq(function_name, "arm64g_calculate_condition")) {
    723 
    724       /* specialise calls to the "arm64g_calculate_condition" function.
    725          Not sure whether this is strictly necessary, but: the
    726          replacement IR must produce only the values 0 or 1.  Bits
    727          63:1 are required to be zero. */
    728       IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_ndep;
    729       vassert(arity == 4);
    730       cond_n_op = args[0]; /* (ARM64Condcode << 4)  |  ARM64G_CC_OP_* */
    731       cc_dep1   = args[1];
    732       cc_dep2   = args[2];
    733       cc_ndep   = args[3];
    734 
    735       /*---------------- SUB64 ----------------*/
    736 
    737       /* 0, 1 */
    738       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
    739          /* EQ after SUB --> test argL == argR */
    740          return unop(Iop_1Uto64,
    741                      binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
    742       }
    743       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
    744          /* NE after SUB --> test argL != argR */
    745          return unop(Iop_1Uto64,
    746                      binop(Iop_CmpNE64, cc_dep1, cc_dep2));
    747       }
    748 
    749       /* 2, 3 */
    750       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
    751          /* CS after SUB --> test argL >=u argR
    752                          --> test argR <=u argL */
    753          return unop(Iop_1Uto64,
    754                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
    755       }
    756       if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
    757          /* CC after SUB --> test argL <u argR */
    758          return unop(Iop_1Uto64,
    759                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
    760       }
    761 
    762       /* 8, 9 */
    763       if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
    764          /* LS after SUB --> test argL <=u argR */
    765          return unop(Iop_1Uto64,
    766                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
    767       }
    768       if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
    769          /* HI after SUB --> test argL >u argR
    770                          --> test argR <u argL */
    771          return unop(Iop_1Uto64,
    772                      binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
    773       }
    774 
    775       /* 10, 11 */
    776       if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
    777          /* LT after SUB --> test argL <s argR */
    778          return unop(Iop_1Uto64,
    779                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
    780       }
    781       if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
    782          /* GE after SUB --> test argL >=s argR
    783                          --> test argR <=s argL */
    784          return unop(Iop_1Uto64,
    785                      binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
    786       }
    787 
    788       /* 12, 13 */
    789       if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
    790          /* GT after SUB --> test argL >s argR
    791                          --> test argR <s argL */
    792          return unop(Iop_1Uto64,
    793                      binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
    794       }
    795       if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
    796          /* LE after SUB --> test argL <=s argR */
    797          return unop(Iop_1Uto64,
    798                      binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
    799       }
    800 
    801       /*---------------- SUB32 ----------------*/
    802 
    803       /* 0, 1 */
    804       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
    805          /* EQ after SUB --> test argL == argR */
    806          return unop(Iop_1Uto64,
    807                      binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
    808                                         unop(Iop_64to32, cc_dep2)));
    809       }
    810       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
    811          /* NE after SUB --> test argL != argR */
    812          return unop(Iop_1Uto64,
    813                      binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
    814                                         unop(Iop_64to32, cc_dep2)));
    815       }
    816 
    817       /* 2, 3 */
    818       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
    819          /* CS after SUB --> test argL >=u argR
    820                          --> test argR <=u argL */
    821          return unop(Iop_1Uto64,
    822                      binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
    823                                          unop(Iop_64to32, cc_dep1)));
    824       }
    825       if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
    826          /* CC after SUB --> test argL <u argR */
    827          return unop(Iop_1Uto64,
    828                      binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
    829                                          unop(Iop_64to32, cc_dep2)));
    830       }
    831 
    832       /* 8, 9 */
    833       if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
    834          /* LS after SUB --> test argL <=u argR */
    835          return unop(Iop_1Uto64,
    836                      binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
    837                                          unop(Iop_64to32, cc_dep2)));
    838       }
    839       if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
    840          /* HI after SUB --> test argL >u argR
    841                          --> test argR <u argL */
    842          return unop(Iop_1Uto64,
    843                      binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
    844                                          unop(Iop_64to32, cc_dep1)));
    845       }
    846 
    847       /* 10, 11 */
    848       if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
    849          /* LT after SUB --> test argL <s argR */
    850          return unop(Iop_1Uto64,
    851                      binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
    852                                          unop(Iop_64to32, cc_dep2)));
    853       }
    854       if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
    855          /* GE after SUB --> test argL >=s argR
    856                          --> test argR <=s argL */
    857          return unop(Iop_1Uto64,
    858                      binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
    859                                          unop(Iop_64to32, cc_dep1)));
    860       }
    861 
    862       /* 12, 13 */
    863       if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
    864          /* GT after SUB --> test argL >s argR
    865                          --> test argR <s argL */
    866          return unop(Iop_1Uto64,
    867                      binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
    868                                          unop(Iop_64to32, cc_dep1)));
    869       }
    870       if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
    871          /* LE after SUB --> test argL <=s argR */
    872          return unop(Iop_1Uto64,
    873                      binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
    874                                          unop(Iop_64to32, cc_dep2)));
    875       }
    876 
    877       /*---------------- SBC64 ----------------*/
    878 
    879       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SBC64)) {
    880          /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
    881          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
    882          /* HS after SBC (same as C after SBC below)
    883             --> oldC ? (argL >=u argR) : (argL >u argR)
    884             --> oldC ? (argR <=u argL) : (argR <u argL)
    885          */
    886          return
    887             IRExpr_ITE(
    888                binop(Iop_CmpNE64, cc_ndep, mkU64(0)),
    889                /* case oldC != 0 */
    890                unop(Iop_1Uto64, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
    891                /* case oldC == 0 */
    892                unop(Iop_1Uto64, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
    893             );
    894       }
    895 
    896       /*---------------- SBC32 ----------------*/
    897 
    898       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SBC32)) {
    899          /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
    900          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
    901          /* HS after SBC (same as C after SBC below)
    902             --> oldC ? (argL >=u argR) : (argL >u argR)
    903             --> oldC ? (argR <=u argL) : (argR <u argL)
    904          */
    905          return
    906             IRExpr_ITE(
    907                binop(Iop_CmpNE64, cc_ndep, mkU64(0)),
    908                /* case oldC != 0 */
    909                unop(Iop_1Uto64, binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
    910                                                     unop(Iop_64to32, cc_dep1))),
    911                /* case oldC == 0 */
    912                unop(Iop_1Uto64, binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
    913                                                     unop(Iop_64to32, cc_dep1)))
    914             );
    915       }
    916 
    917 //ZZ       /*---------------- LOGIC ----------------*/
    918 //ZZ
    919 //ZZ       if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
    920 //ZZ          /* EQ after LOGIC --> test res == 0 */
    921 //ZZ          return unop(Iop_1Uto32,
    922 //ZZ                      binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
    923 //ZZ       }
    924 //ZZ       if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
    925 //ZZ          /* NE after LOGIC --> test res != 0 */
    926 //ZZ          return unop(Iop_1Uto32,
    927 //ZZ                      binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
    928 //ZZ       }
    929 //ZZ
    930 //ZZ       if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
    931 //ZZ          /* PL after LOGIC --> test (res >> 31) == 0 */
    932 //ZZ          return unop(Iop_1Uto32,
    933 //ZZ                      binop(Iop_CmpEQ32,
    934 //ZZ                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
    935 //ZZ                            mkU32(0)));
    936 //ZZ       }
    937 //ZZ       if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
    938 //ZZ          /* MI after LOGIC --> test (res >> 31) == 1 */
    939 //ZZ          return unop(Iop_1Uto32,
    940 //ZZ                      binop(Iop_CmpEQ32,
    941 //ZZ                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
    942 //ZZ                            mkU32(1)));
    943 //ZZ       }
    944 
    945       /*---------------- COPY ----------------*/
    946 
    947       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
    948          /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
    949          return binop(Iop_And64,
    950                       binop(Iop_Shr64, cc_dep1,
    951                                        mkU8(ARM64G_CC_SHIFT_Z)),
    952                       mkU64(1));
    953       }
    954       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
    955          /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
    956          return binop(Iop_And64,
    957                       binop(Iop_Xor64,
    958                             binop(Iop_Shr64, cc_dep1,
    959                                              mkU8(ARM64G_CC_SHIFT_Z)),
    960                             mkU64(1)),
    961                       mkU64(1));
    962       }
    963 
    964 //ZZ       /*----------------- AL -----------------*/
    965 //ZZ
    966 //ZZ       /* A critically important case for Thumb code.
    967 //ZZ
    968 //ZZ          What we're trying to spot is the case where cond_n_op is an
    969 //ZZ          expression of the form Or32(..., 0xE0) since that means the
    970 //ZZ          caller is asking for CondAL and we can simply return 1
    971 //ZZ          without caring what the ... part is.  This is a potentially
    972 //ZZ          dodgy kludge in that it assumes that the ... part has zeroes
    973 //ZZ          in bits 7:4, so that the result of the Or32 is guaranteed to
    974 //ZZ          be 0xE in bits 7:4.  Given that the places where this first
    975 //ZZ          arg are constructed (in guest_arm_toIR.c) are very
    976 //ZZ          constrained, we can get away with this.  To make this
    977 //ZZ          guaranteed safe would require to have a new primop, Slice44
    978 //ZZ          or some such, thusly
    979 //ZZ
    980 //ZZ          Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
    981 //ZZ
    982 //ZZ          and we would then look for Slice44(0xE0, ...)
    983 //ZZ          which would give the required safety property.
    984 //ZZ
    985 //ZZ          It would be infeasibly expensive to scan backwards through
    986 //ZZ          the entire block looking for an assignment to the temp, so
    987 //ZZ          just look at the previous 16 statements.  That should find it
    988 //ZZ          if it is an interesting case, as a result of how the
    989 //ZZ          boilerplate guff at the start of each Thumb insn translation
    990 //ZZ          is made.
    991 //ZZ       */
    992 //ZZ       if (cond_n_op->tag == Iex_RdTmp) {
    993 //ZZ          Int    j;
    994 //ZZ          IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
    995 //ZZ          Int    limit    = n_precedingStmts - 16;
    996 //ZZ          if (limit < 0) limit = 0;
    997 //ZZ          if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
    998 //ZZ          for (j = n_precedingStmts - 1; j >= limit; j--) {
    999 //ZZ             IRStmt* st = precedingStmts[j];
   1000 //ZZ             if (st->tag == Ist_WrTmp
   1001 //ZZ                 && st->Ist.WrTmp.tmp == look_for
   1002 //ZZ                 && st->Ist.WrTmp.data->tag == Iex_Binop
   1003 //ZZ                 && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
   1004 //ZZ                 && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
   1005 //ZZ                return mkU32(1);
   1006 //ZZ          }
   1007 //ZZ          /* Didn't find any useful binding to the first arg
   1008 //ZZ             in the previous 16 stmts. */
   1009 //ZZ       }
   1010    }
   1011 
   1012 //ZZ    /* --------- specialising "armg_calculate_flag_c" --------- */
   1013 //ZZ
   1014 //ZZ    else
   1015 //ZZ    if (vex_streq(function_name, "armg_calculate_flag_c")) {
   1016 //ZZ
   1017 //ZZ       /* specialise calls to the "armg_calculate_flag_c" function.
   1018 //ZZ          Note that the returned value must be either 0 or 1; nonzero
   1019 //ZZ          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
   1020 //ZZ          values (from the thunk) are assumed to have bits 31:1
   1021 //ZZ          clear. */
   1022 //ZZ       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1023 //ZZ       vassert(arity == 4);
   1024 //ZZ       cc_op   = args[0]; /* ARMG_CC_OP_* */
   1025 //ZZ       cc_dep1 = args[1];
   1026 //ZZ       cc_dep2 = args[2];
   1027 //ZZ       cc_ndep = args[3];
   1028 //ZZ
   1029 //ZZ       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
   1030 //ZZ          /* Thunk args are (result, shco, oldV) */
   1031 //ZZ          /* C after LOGIC --> shco */
   1032 //ZZ          return cc_dep2;
   1033 //ZZ       }
   1034 //ZZ
   1035 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
   1036 //ZZ          /* Thunk args are (argL, argR, unused) */
   1037 //ZZ          /* C after SUB --> argL >=u argR
   1038 //ZZ                         --> argR <=u argL */
   1039 //ZZ          return unop(Iop_1Uto32,
   1040 //ZZ                      binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
   1041 //ZZ       }
   1042 //ZZ
   1043 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
   1044 //ZZ          /* This happens occasionally in softfloat code, eg __divdf3+140 */
   1045 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
   1046 //ZZ          /* C after SBB (same as HS after SBB above)
   1047 //ZZ             --> oldC ? (argL >=u argR) : (argL >u argR)
   1048 //ZZ             --> oldC ? (argR <=u argL) : (argR <u argL)
   1049 //ZZ          */
   1050 //ZZ          return
   1051 //ZZ             IRExpr_ITE(
   1052 //ZZ                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
   1053 //ZZ                /* case oldC != 0 */
   1054 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
   1055 //ZZ                /* case oldC == 0 */
   1056 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
   1057 //ZZ             );
   1058 //ZZ       }
   1059 //ZZ
   1060 //ZZ    }
   1061 //ZZ
   1062 //ZZ    /* --------- specialising "armg_calculate_flag_v" --------- */
   1063 //ZZ
   1064 //ZZ    else
   1065 //ZZ    if (vex_streq(function_name, "armg_calculate_flag_v")) {
   1066 //ZZ
   1067 //ZZ       /* specialise calls to the "armg_calculate_flag_v" function.
   1068 //ZZ          Note that the returned value must be either 0 or 1; nonzero
   1069 //ZZ          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
   1070 //ZZ          values (from the thunk) are assumed to have bits 31:1
   1071 //ZZ          clear. */
   1072 //ZZ       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1073 //ZZ       vassert(arity == 4);
   1074 //ZZ       cc_op   = args[0]; /* ARMG_CC_OP_* */
   1075 //ZZ       cc_dep1 = args[1];
   1076 //ZZ       cc_dep2 = args[2];
   1077 //ZZ       cc_ndep = args[3];
   1078 //ZZ
   1079 //ZZ       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
   1080 //ZZ          /* Thunk args are (result, shco, oldV) */
   1081 //ZZ          /* V after LOGIC --> oldV */
   1082 //ZZ          return cc_ndep;
   1083 //ZZ       }
   1084 //ZZ
   1085 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
   1086 //ZZ          /* Thunk args are (argL, argR, unused) */
   1087 //ZZ          /* V after SUB
   1088 //ZZ             --> let res = argL - argR
   1089 //ZZ                 in ((argL ^ argR) & (argL ^ res)) >> 31
   1090 //ZZ             --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
   1091 //ZZ          */
   1092 //ZZ          IRExpr* argL = cc_dep1;
   1093 //ZZ          IRExpr* argR = cc_dep2;
   1094 //ZZ          return
   1095 //ZZ             binop(Iop_Shr32,
   1096 //ZZ                   binop(Iop_And32,
   1097 //ZZ                         binop(Iop_Xor32, argL, argR),
   1098 //ZZ                         binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
   1099 //ZZ                   ),
   1100 //ZZ                   mkU8(31)
   1101 //ZZ             );
   1102 //ZZ       }
   1103 //ZZ
   1104 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
   1105 //ZZ          /* This happens occasionally in softfloat code, eg __divdf3+140 */
   1106 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
   1107 //ZZ          /* V after SBB
   1108 //ZZ             --> let res = argL - argR - (oldC ^ 1)
   1109 //ZZ                 in  (argL ^ argR) & (argL ^ res) & 1
   1110 //ZZ          */
   1111 //ZZ          return
   1112 //ZZ             binop(
   1113 //ZZ                Iop_And32,
   1114 //ZZ                binop(
   1115 //ZZ                   Iop_And32,
   1116 //ZZ                   // argL ^ argR
   1117 //ZZ                   binop(Iop_Xor32, cc_dep1, cc_dep2),
   1118 //ZZ                   // argL ^ (argL - argR - (oldC ^ 1))
   1119 //ZZ                   binop(Iop_Xor32,
   1120 //ZZ                         cc_dep1,
   1121 //ZZ                         binop(Iop_Sub32,
   1122 //ZZ                               binop(Iop_Sub32, cc_dep1, cc_dep2),
   1123 //ZZ                               binop(Iop_Xor32, cc_ndep, mkU32(1)))
   1124 //ZZ                   )
   1125 //ZZ                ),
   1126 //ZZ                mkU32(1)
   1127 //ZZ             );
   1128 //ZZ       }
   1129 //ZZ
   1130 //ZZ    }
   1131 
   1132 #  undef unop
   1133 #  undef binop
   1134 #  undef mkU64
   1135 #  undef mkU8
   1136 
   1137    return NULL;
   1138 }
   1139 
   1140 
   1141 /*----------------------------------------------*/
   1142 /*--- The exported fns ..                    ---*/
   1143 /*----------------------------------------------*/
   1144 
   1145 //ZZ /* VISIBLE TO LIBVEX CLIENT */
   1146 //ZZ #if 0
   1147 //ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
   1148 //ZZ                                  /*OUT*/VexGuestARMState* vex_state )
   1149 //ZZ {
   1150 //ZZ    vassert(0); // FIXME
   1151 //ZZ
   1152 //ZZ    /* Mask out everything except N Z V C. */
   1153 //ZZ    flags_native
   1154 //ZZ       &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
   1155 //ZZ
   1156 //ZZ    vex_state->guest_CC_OP   = ARMG_CC_OP_COPY;
   1157 //ZZ    vex_state->guest_CC_DEP1 = flags_native;
   1158 //ZZ    vex_state->guest_CC_DEP2 = 0;
   1159 //ZZ    vex_state->guest_CC_NDEP = 0;
   1160 //ZZ }
   1161 //ZZ #endif
   1162 
   1163 /* VISIBLE TO LIBVEX CLIENT */
   1164 ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
   1165 {
   1166    ULong nzcv = 0;
   1167    // NZCV
   1168    nzcv |= arm64g_calculate_flags_nzcv(
   1169                vex_state->guest_CC_OP,
   1170                vex_state->guest_CC_DEP1,
   1171                vex_state->guest_CC_DEP2,
   1172                vex_state->guest_CC_NDEP
   1173             );
   1174    vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
   1175 //ZZ    // Q
   1176 //ZZ    if (vex_state->guest_QFLAG32 > 0)
   1177 //ZZ       cpsr |= (1 << 27);
   1178 //ZZ    // GE
   1179 //ZZ    if (vex_state->guest_GEFLAG0 > 0)
   1180 //ZZ       cpsr |= (1 << 16);
   1181 //ZZ    if (vex_state->guest_GEFLAG1 > 0)
   1182 //ZZ       cpsr |= (1 << 17);
   1183 //ZZ    if (vex_state->guest_GEFLAG2 > 0)
   1184 //ZZ       cpsr |= (1 << 18);
   1185 //ZZ    if (vex_state->guest_GEFLAG3 > 0)
   1186 //ZZ       cpsr |= (1 << 19);
   1187 //ZZ    // M
   1188 //ZZ    cpsr |= (1 << 4); // 0b10000 means user-mode
   1189 //ZZ    // J,T   J (bit 24) is zero by initialisation above
   1190 //ZZ    // T  we copy from R15T[0]
   1191 //ZZ    if (vex_state->guest_R15T & 1)
   1192 //ZZ       cpsr |= (1 << 5);
   1193 //ZZ    // ITSTATE we punt on for the time being.  Could compute it
   1194 //ZZ    // if needed though.
   1195 //ZZ    // E, endianness, 0 (littleendian) from initialisation above
   1196 //ZZ    // A,I,F disable some async exceptions.  Not sure about these.
   1197 //ZZ    // Leave as zero for the time being.
   1198    return nzcv;
   1199 }
   1200 
   1201 /* VISIBLE TO LIBVEX CLIENT */
   1202 void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
   1203 {
   1204    vex_bzero(vex_state, sizeof(*vex_state));
   1205 //ZZ    vex_state->host_EvC_FAILADDR = 0;
   1206 //ZZ    vex_state->host_EvC_COUNTER = 0;
   1207 //ZZ
   1208 //ZZ    vex_state->guest_R0  = 0;
   1209 //ZZ    vex_state->guest_R1  = 0;
   1210 //ZZ    vex_state->guest_R2  = 0;
   1211 //ZZ    vex_state->guest_R3  = 0;
   1212 //ZZ    vex_state->guest_R4  = 0;
   1213 //ZZ    vex_state->guest_R5  = 0;
   1214 //ZZ    vex_state->guest_R6  = 0;
   1215 //ZZ    vex_state->guest_R7  = 0;
   1216 //ZZ    vex_state->guest_R8  = 0;
   1217 //ZZ    vex_state->guest_R9  = 0;
   1218 //ZZ    vex_state->guest_R10 = 0;
   1219 //ZZ    vex_state->guest_R11 = 0;
   1220 //ZZ    vex_state->guest_R12 = 0;
   1221 //ZZ    vex_state->guest_R13 = 0;
   1222 //ZZ    vex_state->guest_R14 = 0;
   1223 //ZZ    vex_state->guest_R15T = 0;  /* NB: implies ARM mode */
   1224 //ZZ
   1225    vex_state->guest_CC_OP   = ARM64G_CC_OP_COPY;
   1226 //ZZ    vex_state->guest_CC_DEP1 = 0;
   1227 //ZZ    vex_state->guest_CC_DEP2 = 0;
   1228 //ZZ    vex_state->guest_CC_NDEP = 0;
   1229 //ZZ    vex_state->guest_QFLAG32 = 0;
   1230 //ZZ    vex_state->guest_GEFLAG0 = 0;
   1231 //ZZ    vex_state->guest_GEFLAG1 = 0;
   1232 //ZZ    vex_state->guest_GEFLAG2 = 0;
   1233 //ZZ    vex_state->guest_GEFLAG3 = 0;
   1234 //ZZ
   1235 //ZZ    vex_state->guest_EMNOTE  = EmNote_NONE;
   1236 //ZZ    vex_state->guest_CMSTART = 0;
   1237 //ZZ    vex_state->guest_CMLEN   = 0;
   1238 //ZZ    vex_state->guest_NRADDR  = 0;
   1239 //ZZ    vex_state->guest_IP_AT_SYSCALL = 0;
   1240 //ZZ
   1241 //ZZ    vex_state->guest_D0  = 0;
   1242 //ZZ    vex_state->guest_D1  = 0;
   1243 //ZZ    vex_state->guest_D2  = 0;
   1244 //ZZ    vex_state->guest_D3  = 0;
   1245 //ZZ    vex_state->guest_D4  = 0;
   1246 //ZZ    vex_state->guest_D5  = 0;
   1247 //ZZ    vex_state->guest_D6  = 0;
   1248 //ZZ    vex_state->guest_D7  = 0;
   1249 //ZZ    vex_state->guest_D8  = 0;
   1250 //ZZ    vex_state->guest_D9  = 0;
   1251 //ZZ    vex_state->guest_D10 = 0;
   1252 //ZZ    vex_state->guest_D11 = 0;
   1253 //ZZ    vex_state->guest_D12 = 0;
   1254 //ZZ    vex_state->guest_D13 = 0;
   1255 //ZZ    vex_state->guest_D14 = 0;
   1256 //ZZ    vex_state->guest_D15 = 0;
   1257 //ZZ    vex_state->guest_D16 = 0;
   1258 //ZZ    vex_state->guest_D17 = 0;
   1259 //ZZ    vex_state->guest_D18 = 0;
   1260 //ZZ    vex_state->guest_D19 = 0;
   1261 //ZZ    vex_state->guest_D20 = 0;
   1262 //ZZ    vex_state->guest_D21 = 0;
   1263 //ZZ    vex_state->guest_D22 = 0;
   1264 //ZZ    vex_state->guest_D23 = 0;
   1265 //ZZ    vex_state->guest_D24 = 0;
   1266 //ZZ    vex_state->guest_D25 = 0;
   1267 //ZZ    vex_state->guest_D26 = 0;
   1268 //ZZ    vex_state->guest_D27 = 0;
   1269 //ZZ    vex_state->guest_D28 = 0;
   1270 //ZZ    vex_state->guest_D29 = 0;
   1271 //ZZ    vex_state->guest_D30 = 0;
   1272 //ZZ    vex_state->guest_D31 = 0;
   1273 //ZZ
   1274 //ZZ    /* ARM encoded; zero is the default as it happens (result flags
   1275 //ZZ       (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
   1276 //ZZ       all exns masked, all exn sticky bits cleared). */
   1277 //ZZ    vex_state->guest_FPSCR = 0;
   1278 //ZZ
   1279 //ZZ    vex_state->guest_TPIDRURO = 0;
   1280 //ZZ
   1281 //ZZ    /* Not in a Thumb IT block. */
   1282 //ZZ    vex_state->guest_ITSTATE = 0;
   1283 //ZZ
   1284 //ZZ    vex_state->padding1 = 0;
   1285 //ZZ    vex_state->padding2 = 0;
   1286 //ZZ    vex_state->padding3 = 0;
   1287 //ZZ    vex_state->padding4 = 0;
   1288 //ZZ    vex_state->padding5 = 0;
   1289 }
   1290 
   1291 
   1292 /*-----------------------------------------------------------*/
   1293 /*--- Describing the arm guest state, for the benefit     ---*/
   1294 /*--- of iropt and instrumenters.                         ---*/
   1295 /*-----------------------------------------------------------*/
   1296 
   1297 /* Figure out if any part of the guest state contained in minoff
   1298    .. maxoff requires precise memory exceptions.  If in doubt return
   1299    True (but this generates significantly slower code).
   1300 
   1301    We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
   1302    That might be overkill (for 29 and 30); I don't know.
   1303 */
   1304 Bool guest_arm64_state_requires_precise_mem_exns ( Int minoff,
   1305                                                    Int maxoff)
   1306 {
   1307    Int xsp_min = offsetof(VexGuestARM64State, guest_XSP);
   1308    Int xsp_max = xsp_min + 8 - 1;
   1309    Int pc_min  = offsetof(VexGuestARM64State, guest_PC);
   1310    Int pc_max  = pc_min + 8 - 1;
   1311 
   1312    if (maxoff < xsp_min || minoff > xsp_max) {
   1313       /* no overlap with xsp */
   1314       if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess)
   1315          return False; // We only need to check stack pointer.
   1316    } else {
   1317       return True;
   1318    }
   1319 
   1320    if (maxoff < pc_min || minoff > pc_max) {
   1321       /* no overlap with pc */
   1322    } else {
   1323       return True;
   1324    }
   1325 
   1326    /* Guessing that we need PX for FP, but I don't really know. */
   1327    Int x29_min = offsetof(VexGuestARM64State, guest_X29);
   1328    Int x29_max = x29_min + 8 - 1;
   1329 
   1330    if (maxoff < x29_min || minoff > x29_max) {
   1331       /* no overlap with x29 */
   1332    } else {
   1333       return True;
   1334    }
   1335 
   1336    /* Guessing that we need PX for LR, but I don't really know. */
   1337    Int x30_min = offsetof(VexGuestARM64State, guest_X30);
   1338    Int x30_max = x30_min + 8 - 1;
   1339 
   1340    if (maxoff < x30_min || minoff > x30_max) {
   1341       /* no overlap with r30 */
   1342    } else {
   1343       return True;
   1344    }
   1345 
   1346    return False;
   1347 }
   1348 
   1349 
   1350 #define ALWAYSDEFD(field)                             \
   1351     { offsetof(VexGuestARM64State, field),            \
   1352       (sizeof ((VexGuestARM64State*)0)->field) }
   1353 VexGuestLayout
   1354    arm64Guest_layout
   1355       = {
   1356           /* Total size of the guest state, in bytes. */
   1357           .total_sizeB = sizeof(VexGuestARM64State),
   1358 
   1359           /* Describe the stack pointer. */
   1360           .offset_SP = offsetof(VexGuestARM64State,guest_XSP),
   1361           .sizeof_SP = 8,
   1362 
   1363           /* Describe the instruction pointer. */
   1364           .offset_IP = offsetof(VexGuestARM64State,guest_PC),
   1365           .sizeof_IP = 8,
   1366 
   1367           /* Describe any sections to be regarded by Memcheck as
   1368              'always-defined'. */
   1369           .n_alwaysDefd = 10,
   1370 
   1371           /* flags thunk: OP is always defd, whereas DEP1 and DEP2
   1372              have to be tracked.  See detailed comment in gdefs.h on
   1373              meaning of thunk fields. */
   1374           .alwaysDefd
   1375              = { /* 0 */ ALWAYSDEFD(guest_PC),
   1376                  /* 1 */ ALWAYSDEFD(guest_CC_OP),
   1377                  /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
   1378                  /* 3 */ ALWAYSDEFD(guest_EMNOTE),
   1379                  /* 4 */ ALWAYSDEFD(guest_CMSTART),
   1380                  /* 5 */ ALWAYSDEFD(guest_CMLEN),
   1381                  /* 6 */ ALWAYSDEFD(guest_NRADDR),
   1382                  /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
   1383                  /* 8 */ ALWAYSDEFD(guest_FPCR),
   1384                  /* 9 */ ALWAYSDEFD(guest_FPSR)
   1385                }
   1386         };
   1387 
   1388 
   1389 /*---------------------------------------------------------------*/
   1390 /*--- end                               guest_arm64_helpers.c ---*/
   1391 /*---------------------------------------------------------------*/
   1392