Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                             guest_arm64_helpers.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2013-2017 OpenWorks
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_basictypes.h"
     32 #include "libvex_emnote.h"
     33 #include "libvex_guest_arm64.h"
     34 #include "libvex_ir.h"
     35 #include "libvex.h"
     36 
     37 #include "main_util.h"
     38 #include "main_globals.h"
     39 #include "guest_generic_bb_to_IR.h"
     40 #include "guest_arm64_defs.h"
     41 
     42 
     43 /* This file contains helper functions for arm guest code.  Calls to
     44    these functions are generated by the back end.  These calls are of
     45    course in the host machine code and this file will be compiled to
     46    host machine code, so that all makes sense.
     47 
     48    Only change the signatures of these helper functions very
     49    carefully.  If you change the signature here, you'll have to change
     50    the parameters passed to it in the IR calls constructed by
     51    guest_arm64_toIR.c.
     52 */
     53 
     54 
     55 /* Set to 1 to get detailed profiling info about individual N, Z, C
     56    and V flag evaluation. */
     57 #define PROFILE_NZCV_FLAGS 0
     58 
     59 #if PROFILE_NZCV_FLAGS
     60 
     61 static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
     62 static UInt initted = 0;
     63 static UInt tot_evals = 0;
     64 
     65 static void initCounts ( void )
     66 {
     67    UInt i, j;
     68    for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
     69       for (j = 0; j < 16; j++) {
     70          tab_eval[i][j] = 0;
     71       }
     72    }
     73    initted = 1;
     74 }
     75 
     76 static void showCounts ( void )
     77 {
     78    const HChar* nameCC[16]
     79       = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
     80           "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
     81    UInt i, j;
     82    ULong sum = 0;
     83    vex_printf("\nCC_OP          0         1         2         3    "
     84               "     4         5         6\n");
     85    vex_printf(  "--------------------------------------------------"
     86               "--------------------------\n");
     87    for (j = 0; j < 16; j++) {
     88       vex_printf("%2d %s  ", j, nameCC[j]);
     89       for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
     90          vex_printf("%9d ", tab_eval[i][j]);
     91          sum += tab_eval[i][j];
     92       }
     93       vex_printf("\n");
     94    }
     95    vex_printf("(In total %llu calls)\n", sum);
     96 }
     97 
     98 #define NOTE_EVAL(_cc_op, _cond) \
     99    do { \
    100       if (!initted) initCounts(); \
    101       vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
    102       vassert( ((UInt)(_cond)) < 16); \
    103       tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++;  \
    104       tot_evals++; \
    105       if (0 == (tot_evals & 0x7FFF)) \
    106         showCounts(); \
    107    } while (0)
    108 
    109 #endif /* PROFILE_NZCV_FLAGS */
    110 
    111 
    112 /* Calculate the N flag from the supplied thunk components, in the
    113    least significant bit of the word.  Returned bits 63:1 are zero. */
    114 static
    115 ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
    116                                 ULong cc_dep2, ULong cc_dep3 )
    117 {
    118    switch (cc_op) {
    119       case ARM64G_CC_OP_COPY: {
    120          /* (nzcv:28x0, unused, unused) */
    121          ULong nf   = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
    122          return nf;
    123       }
    124       case ARM64G_CC_OP_ADD32: {
    125          /* (argL, argR, unused) */
    126          UInt  argL = (UInt)cc_dep1;
    127          UInt  argR = (UInt)cc_dep2;
    128          UInt  res  = argL + argR;
    129          ULong nf   = (ULong)(res >> 31);
    130          return nf;
    131       }
    132       case ARM64G_CC_OP_ADD64: {
    133          /* (argL, argR, unused) */
    134          ULong argL = cc_dep1;
    135          ULong argR = cc_dep2;
    136          ULong res  = argL + argR;
    137          ULong nf   = (ULong)(res >> 63);
    138          return nf;
    139       }
    140       case ARM64G_CC_OP_SUB32: {
    141          /* (argL, argR, unused) */
    142          UInt  argL = (UInt)cc_dep1;
    143          UInt  argR = (UInt)cc_dep2;
    144          UInt  res  = argL - argR;
    145          ULong nf   = (ULong)(res >> 31);
    146          return nf;
    147       }
    148       case ARM64G_CC_OP_SUB64: {
    149          /* (argL, argR, unused) */
    150          ULong argL = cc_dep1;
    151          ULong argR = cc_dep2;
    152          ULong res  = argL - argR;
    153          ULong nf   = res >> 63;
    154          return nf;
    155       }
    156       case ARM64G_CC_OP_ADC32: {
    157          /* (argL, argR, oldC) */
    158          UInt  argL = cc_dep1;
    159          UInt  argR = cc_dep2;
    160          UInt  oldC = cc_dep3;
    161          vassert((oldC & ~1) == 0);
    162          UInt  res  = argL + argR + oldC;
    163          ULong nf   = (ULong)(res >> 31);
    164          return nf;
    165       }
    166       case ARM64G_CC_OP_ADC64: {
    167          /* (argL, argR, oldC) */
    168          ULong argL = cc_dep1;
    169          ULong argR = cc_dep2;
    170          ULong oldC = cc_dep3;
    171          vassert((oldC & ~1) == 0);
    172          ULong res  = argL + argR + oldC;
    173          ULong nf   = res >> 63;
    174          return nf;
    175       }
    176       case ARM64G_CC_OP_SBC32: {
    177          /* (argL, argR, oldC) */
    178          UInt  argL = cc_dep1;
    179          UInt  argR = cc_dep2;
    180          UInt  oldC = cc_dep3;
    181          vassert((oldC & ~1) == 0);
    182          UInt  res  = argL - argR - (oldC ^ 1);
    183          ULong nf   = (ULong)(res >> 31);
    184          return nf;
    185       }
    186       case ARM64G_CC_OP_SBC64: {
    187          /* (argL, argR, oldC) */
    188          ULong argL = cc_dep1;
    189          ULong argR = cc_dep2;
    190          ULong oldC = cc_dep3;
    191          vassert((oldC & ~1) == 0);
    192          ULong res  = argL - argR - (oldC ^ 1);
    193          ULong nf   = res >> 63;
    194          return nf;
    195       }
    196       case ARM64G_CC_OP_LOGIC32: {
    197          /* (res, unused, unused) */
    198          UInt  res = (UInt)cc_dep1;
    199          ULong nf  = res >> 31;
    200          return nf;
    201       }
    202       case ARM64G_CC_OP_LOGIC64: {
    203          /* (res, unused, unused) */
    204          ULong res = cc_dep1;
    205          ULong nf  = res >> 63;
    206          return nf;
    207       }
    208 //ZZ       case ARMG_CC_OP_MUL: {
    209 //ZZ          /* (res, unused, oldC:oldV) */
    210 //ZZ          UInt res  = cc_dep1;
    211 //ZZ          UInt nf   = res >> 31;
    212 //ZZ          return nf;
    213 //ZZ       }
    214 //ZZ       case ARMG_CC_OP_MULL: {
    215 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    216 //ZZ          UInt resHi32 = cc_dep2;
    217 //ZZ          UInt nf      = resHi32 >> 31;
    218 //ZZ          return nf;
    219 //ZZ       }
    220       default:
    221          /* shouldn't really make these calls from generated code */
    222          vex_printf("arm64g_calculate_flag_n"
    223                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    224                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    225          vpanic("arm64g_calculate_flag_n");
    226    }
    227 }
    228 
    229 
    230 /* Calculate the Z flag from the supplied thunk components, in the
    231    least significant bit of the word.  Returned bits 63:1 are zero. */
    232 static
    233 ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
    234                                 ULong cc_dep2, ULong cc_dep3 )
    235 {
    236    switch (cc_op) {
    237       case ARM64G_CC_OP_COPY: {
    238          /* (nzcv:28x0, unused, unused) */
    239          ULong zf   = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
    240          return zf;
    241       }
    242       case ARM64G_CC_OP_ADD32: {
    243          /* (argL, argR, unused) */
    244          UInt  argL = (UInt)cc_dep1;
    245          UInt  argR = (UInt)cc_dep2;
    246          UInt  res  = argL + argR;
    247          ULong zf   = res == 0;
    248          return zf;
    249       }
    250       case ARM64G_CC_OP_ADD64: {
    251          /* (argL, argR, unused) */
    252          ULong argL = cc_dep1;
    253          ULong argR = cc_dep2;
    254          ULong res  = argL + argR;
    255          ULong zf   = res == 0;
    256          return zf;
    257       }
    258       case ARM64G_CC_OP_SUB32: {
    259          /* (argL, argR, unused) */
    260          UInt  argL = (UInt)cc_dep1;
    261          UInt  argR = (UInt)cc_dep2;
    262          UInt  res  = argL - argR;
    263          ULong zf   = res == 0;
    264          return zf;
    265       }
    266       case ARM64G_CC_OP_SUB64: {
    267          /* (argL, argR, unused) */
    268          ULong argL = cc_dep1;
    269          ULong argR = cc_dep2;
    270          ULong res  = argL - argR;
    271          ULong zf   = res == 0;
    272          return zf;
    273       }
    274       case ARM64G_CC_OP_ADC32: {
    275          /* (argL, argR, oldC) */
    276          UInt  argL = cc_dep1;
    277          UInt  argR = cc_dep2;
    278          UInt  oldC = cc_dep3;
    279          vassert((oldC & ~1) == 0);
    280          UInt  res  = argL + argR + oldC;
    281          ULong zf   = res == 0;
    282          return zf;
    283       }
    284       case ARM64G_CC_OP_ADC64: {
    285          /* (argL, argR, oldC) */
    286          ULong argL = cc_dep1;
    287          ULong argR = cc_dep2;
    288          ULong oldC = cc_dep3;
    289          vassert((oldC & ~1) == 0);
    290          ULong res  = argL + argR + oldC;
    291          ULong zf   = res == 0;
    292          return zf;
    293       }
    294       case ARM64G_CC_OP_SBC32: {
    295          /* (argL, argR, oldC) */
    296          UInt  argL = cc_dep1;
    297          UInt  argR = cc_dep2;
    298          UInt  oldC = cc_dep3;
    299          vassert((oldC & ~1) == 0);
    300          UInt  res  = argL - argR - (oldC ^ 1);
    301          ULong zf   = res == 0;
    302          return zf;
    303       }
    304       case ARM64G_CC_OP_SBC64: {
    305          /* (argL, argR, oldC) */
    306          ULong argL = cc_dep1;
    307          ULong argR = cc_dep2;
    308          ULong oldC = cc_dep3;
    309          vassert((oldC & ~1) == 0);
    310          ULong res  = argL - argR - (oldC ^ 1);
    311          ULong zf   = res == 0;
    312          return zf;
    313       }
    314       case ARM64G_CC_OP_LOGIC32: {
    315          /* (res, unused, unused) */
    316          UInt  res  = (UInt)cc_dep1;
    317          ULong zf   = res == 0;
    318          return zf;
    319       }
    320       case ARM64G_CC_OP_LOGIC64: {
    321          /* (res, unused, unused) */
    322          ULong res  = cc_dep1;
    323          ULong zf   = res == 0;
    324          return zf;
    325       }
    326 //ZZ       case ARMG_CC_OP_MUL: {
    327 //ZZ          /* (res, unused, oldC:oldV) */
    328 //ZZ          UInt res  = cc_dep1;
    329 //ZZ          UInt zf   = res == 0;
    330 //ZZ          return zf;
    331 //ZZ       }
    332 //ZZ       case ARMG_CC_OP_MULL: {
    333 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    334 //ZZ          UInt resLo32 = cc_dep1;
    335 //ZZ          UInt resHi32 = cc_dep2;
    336 //ZZ          UInt zf      = (resHi32|resLo32) == 0;
    337 //ZZ          return zf;
    338 //ZZ       }
    339       default:
    340          /* shouldn't really make these calls from generated code */
    341          vex_printf("arm64g_calculate_flag_z"
    342                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    343                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    344          vpanic("arm64g_calculate_flag_z");
    345    }
    346 }
    347 
    348 
    349 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    350 /* Calculate the C flag from the supplied thunk components, in the
    351    least significant bit of the word.  Returned bits 63:1 are zero. */
    352 ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
    353                                 ULong cc_dep2, ULong cc_dep3 )
    354 {
    355    switch (cc_op) {
    356       case ARM64G_CC_OP_COPY: {
    357          /* (nzcv:28x0, unused, unused) */
    358          ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
    359          return cf;
    360       }
    361       case ARM64G_CC_OP_ADD32: {
    362          /* (argL, argR, unused) */
    363          UInt  argL = (UInt)cc_dep1;
    364          UInt  argR = (UInt)cc_dep2;
    365          UInt  res  = argL + argR;
    366          ULong cf   = res < argL;
    367          return cf;
    368       }
    369       case ARM64G_CC_OP_ADD64: {
    370          /* (argL, argR, unused) */
    371          ULong argL = cc_dep1;
    372          ULong argR = cc_dep2;
    373          ULong res  = argL + argR;
    374          ULong cf   = res < argL;
    375          return cf;
    376       }
    377       case ARM64G_CC_OP_SUB32: {
    378          /* (argL, argR, unused) */
    379          UInt  argL = (UInt)cc_dep1;
    380          UInt  argR = (UInt)cc_dep2;
    381          ULong cf   = argL >= argR;
    382          return cf;
    383       }
    384       case ARM64G_CC_OP_SUB64: {
    385          /* (argL, argR, unused) */
    386          ULong argL = cc_dep1;
    387          ULong argR = cc_dep2;
    388          ULong cf   = argL >= argR;
    389          return cf;
    390       }
    391       case ARM64G_CC_OP_ADC32: {
    392          /* (argL, argR, oldC) */
    393          UInt  argL = cc_dep1;
    394          UInt  argR = cc_dep2;
    395          UInt  oldC = cc_dep3;
    396          vassert((oldC & ~1) == 0);
    397          UInt  res  = argL + argR + oldC;
    398          ULong cf   = oldC ? (res <= argL) : (res < argL);
    399          return cf;
    400       }
    401       case ARM64G_CC_OP_ADC64: {
    402          /* (argL, argR, oldC) */
    403          ULong argL = cc_dep1;
    404          ULong argR = cc_dep2;
    405          ULong oldC = cc_dep3;
    406          vassert((oldC & ~1) == 0);
    407          ULong res  = argL + argR + oldC;
    408          ULong cf   = oldC ? (res <= argL) : (res < argL);
    409          return cf;
    410       }
    411       case ARM64G_CC_OP_SBC32: {
    412          /* (argL, argR, oldC) */
    413          UInt  argL = cc_dep1;
    414          UInt  argR = cc_dep2;
    415          UInt  oldC = cc_dep3;
    416          vassert((oldC & ~1) == 0);
    417          ULong cf   = oldC ? (argL >= argR) : (argL > argR);
    418          return cf;
    419       }
    420       case ARM64G_CC_OP_SBC64: {
    421          /* (argL, argR, oldC) */
    422          ULong argL = cc_dep1;
    423          ULong argR = cc_dep2;
    424          ULong oldC = cc_dep3;
    425          vassert((oldC & ~1) == 0);
    426          ULong cf   = oldC ? (argL >= argR) : (argL > argR);
    427          return cf;
    428       }
    429       case ARM64G_CC_OP_LOGIC32:
    430       case ARM64G_CC_OP_LOGIC64: {
    431          /* (res, unused, unused) */
    432          return 0; // C after logic is zero on arm64
    433       }
    434 //ZZ       case ARMG_CC_OP_MUL: {
    435 //ZZ          /* (res, unused, oldC:oldV) */
    436 //ZZ          UInt oldC = (cc_dep3 >> 1) & 1;
    437 //ZZ          vassert((cc_dep3 & ~3) == 0);
    438 //ZZ          UInt cf   = oldC;
    439 //ZZ          return cf;
    440 //ZZ       }
    441 //ZZ       case ARMG_CC_OP_MULL: {
    442 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    443 //ZZ          UInt oldC    = (cc_dep3 >> 1) & 1;
    444 //ZZ          vassert((cc_dep3 & ~3) == 0);
    445 //ZZ          UInt cf      = oldC;
    446 //ZZ          return cf;
    447 //ZZ       }
    448       default:
    449          /* shouldn't really make these calls from generated code */
    450          vex_printf("arm64g_calculate_flag_c"
    451                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    452                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    453          vpanic("arm64g_calculate_flag_c");
    454    }
    455 }
    456 
    457 
    458 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    459 /* Calculate the V flag from the supplied thunk components, in the
    460    least significant bit of the word.  Returned bits 63:1 are zero. */
    461 static
    462 ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
    463                                 ULong cc_dep2, ULong cc_dep3 )
    464 {
    465    switch (cc_op) {
    466       case ARM64G_CC_OP_COPY: {
    467          /* (nzcv:28x0, unused, unused) */
    468          ULong vf   = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
    469          return vf;
    470       }
    471       case ARM64G_CC_OP_ADD32: {
    472          /* (argL, argR, unused) */
    473          UInt  argL = (UInt)cc_dep1;
    474          UInt  argR = (UInt)cc_dep2;
    475          UInt  res  = argL + argR;
    476          ULong vf   = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
    477          return vf;
    478       }
    479       case ARM64G_CC_OP_ADD64: {
    480          /* (argL, argR, unused) */
    481          ULong argL = cc_dep1;
    482          ULong argR = cc_dep2;
    483          ULong res  = argL + argR;
    484          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 63;
    485          return vf;
    486       }
    487       case ARM64G_CC_OP_SUB32: {
    488          /* (argL, argR, unused) */
    489          UInt  argL = (UInt)cc_dep1;
    490          UInt  argR = (UInt)cc_dep2;
    491          UInt  res  = argL - argR;
    492          ULong vf   = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
    493          return vf;
    494       }
    495       case ARM64G_CC_OP_SUB64: {
    496          /* (argL, argR, unused) */
    497          ULong argL = cc_dep1;
    498          ULong argR = cc_dep2;
    499          ULong res  = argL - argR;
    500          ULong vf   = (((argL ^ argR) & (argL ^ res))) >> 63;
    501          return vf;
    502       }
    503       case ARM64G_CC_OP_ADC32: {
    504          /* (argL, argR, oldC) */
    505          UInt  argL = cc_dep1;
    506          UInt  argR = cc_dep2;
    507          UInt  oldC = cc_dep3;
    508          vassert((oldC & ~1) == 0);
    509          UInt  res  = argL + argR + oldC;
    510          ULong vf   = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
    511          return vf;
    512       }
    513       case ARM64G_CC_OP_ADC64: {
    514          /* (argL, argR, oldC) */
    515          ULong argL = cc_dep1;
    516          ULong argR = cc_dep2;
    517          ULong oldC = cc_dep3;
    518          vassert((oldC & ~1) == 0);
    519          ULong res  = argL + argR + oldC;
    520          ULong vf   = ((res ^ argL) & (res ^ argR)) >> 63;
    521          return vf;
    522       }
    523       case ARM64G_CC_OP_SBC32: {
    524          /* (argL, argR, oldC) */
    525          UInt  argL = cc_dep1;
    526          UInt  argR = cc_dep2;
    527          UInt  oldC = cc_dep3;
    528          vassert((oldC & ~1) == 0);
    529          UInt  res  = argL - argR - (oldC ^ 1);
    530          ULong vf   = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
    531          return vf;
    532       }
    533       case ARM64G_CC_OP_SBC64: {
    534          /* (argL, argR, oldC) */
    535          ULong argL = cc_dep1;
    536          ULong argR = cc_dep2;
    537          ULong oldC = cc_dep3;
    538          vassert((oldC & ~1) == 0);
    539          ULong res  = argL - argR - (oldC ^ 1);
    540          ULong vf   = ((argL ^ argR) & (argL ^ res)) >> 63;
    541          return vf;
    542       }
    543       case ARM64G_CC_OP_LOGIC32:
    544       case ARM64G_CC_OP_LOGIC64: {
    545          /* (res, unused, unused) */
    546          return 0; // V after logic is zero on arm64
    547       }
    548 //ZZ       case ARMG_CC_OP_MUL: {
    549 //ZZ          /* (res, unused, oldC:oldV) */
    550 //ZZ          UInt oldV = (cc_dep3 >> 0) & 1;
    551 //ZZ          vassert((cc_dep3 & ~3) == 0);
    552 //ZZ          UInt vf   = oldV;
    553 //ZZ          return vf;
    554 //ZZ       }
    555 //ZZ       case ARMG_CC_OP_MULL: {
    556 //ZZ          /* (resLo32, resHi32, oldC:oldV) */
    557 //ZZ          UInt oldV    = (cc_dep3 >> 0) & 1;
    558 //ZZ          vassert((cc_dep3 & ~3) == 0);
    559 //ZZ          UInt vf      = oldV;
    560 //ZZ          return vf;
    561 //ZZ       }
    562       default:
    563          /* shouldn't really make these calls from generated code */
    564          vex_printf("arm64g_calculate_flag_v"
    565                     "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
    566                     cc_op, cc_dep1, cc_dep2, cc_dep3 );
    567          vpanic("arm64g_calculate_flag_v");
    568    }
    569 }
    570 
    571 
    572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    573 /* Calculate NZCV from the supplied thunk components, in the positions
    574    they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
    575    Returned bits 27:0 are zero. */
    576 ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
    577                                     ULong cc_dep2, ULong cc_dep3 )
    578 {
    579    ULong f;
    580    ULong res = 0;
    581    f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    582    res |= (f << ARM64G_CC_SHIFT_N);
    583    f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    584    res |= (f << ARM64G_CC_SHIFT_Z);
    585    f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
    586    res |= (f << ARM64G_CC_SHIFT_C);
    587    f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    588    res |= (f << ARM64G_CC_SHIFT_V);
    589    return res;
    590 }
    591 
    592 //ZZ
    593 //ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    594 //ZZ /* Calculate the QC flag from the arguments, in the lowest bit
    595 //ZZ    of the word (bit 0).  Urr, having this out of line is bizarre.
    596 //ZZ    Push back inline. */
    597 //ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
    598 //ZZ                               UInt resR1, UInt resR2 )
    599 //ZZ {
    600 //ZZ    if (resL1 != resR1 || resL2 != resR2)
    601 //ZZ       return 1;
    602 //ZZ    else
    603 //ZZ       return 0;
    604 //ZZ }
    605 
    606 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    607 /* Calculate the specified condition from the thunk components, in the
    608    lowest bit of the word (bit 0).  Returned bits 63:1 are zero. */
    609 ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
    610                                    ULong cond_n_op ,
    611                                    ULong cc_dep1,
    612                                    ULong cc_dep2, ULong cc_dep3 )
    613 {
    614    ULong cond  = cond_n_op >> 4;
    615    ULong cc_op = cond_n_op & 0xF;
    616    ULong inv   = cond & 1;
    617    ULong nf, zf, vf, cf;
    618 
    619 #  if PROFILE_NZCV_FLAGS
    620    NOTE_EVAL(cc_op, cond);
    621 #  endif
    622 
    623    //   vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
    624    //              cond_n_op, cc_dep1, cc_dep2, cc_dep3);
    625 
    626    switch (cond) {
    627       case ARM64CondEQ:    // Z=1         => z
    628       case ARM64CondNE:    // Z=0
    629          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    630          return inv ^ zf;
    631 
    632       case ARM64CondCS:    // C=1         => c
    633       case ARM64CondCC:    // C=0
    634          cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
    635          return inv ^ cf;
    636 
    637       case ARM64CondMI:    // N=1         => n
    638       case ARM64CondPL:    // N=0
    639          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    640          return inv ^ nf;
    641 
    642       case ARM64CondVS:    // V=1         => v
    643       case ARM64CondVC:    // V=0
    644          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    645          return inv ^ vf;
    646 
    647       case ARM64CondHI:    // C=1 && Z=0   => c & ~z
    648       case ARM64CondLS:    // C=0 || Z=1
    649          cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
    650          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    651          return inv ^ (1 & (cf & ~zf));
    652 
    653       case ARM64CondGE:    // N=V          => ~(n^v)
    654       case ARM64CondLT:    // N!=V
    655          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    656          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    657          return inv ^ (1 & ~(nf ^ vf));
    658 
    659       case ARM64CondGT:    // Z=0 && N=V   => ~z & ~(n^v)  =>  ~(z | (n^v))
    660       case ARM64CondLE:    // Z=1 || N!=V
    661          nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
    662          vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
    663          zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
    664          return inv ^ (1 & ~(zf | (nf ^ vf)));
    665 
    666       case ARM64CondAL:    // 1
    667       case ARM64CondNV:    // 1
    668          return 1;
    669 
    670       default:
    671          /* shouldn't really make these calls from generated code */
    672          vex_printf("arm64g_calculate_condition(ARM64)"
    673                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
    674                     cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
    675          vpanic("armg_calculate_condition(ARM64)");
    676    }
    677 }
    678 
    679 
    680 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    681 ULong arm64g_calc_crc32b ( ULong acc, ULong bits )
    682 {
    683    UInt  i;
    684    ULong crc = (bits & 0xFFULL) ^ acc;
    685    for (i = 0; i < 8; i++)
    686       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
    687    return crc;
    688 }
    689 
    690 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    691 ULong arm64g_calc_crc32h ( ULong acc, ULong bits )
    692 {
    693    UInt  i;
    694    ULong crc = (bits & 0xFFFFULL) ^ acc;
    695    for (i = 0; i < 16; i++)
    696       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
    697    return crc;
    698 }
    699 
    700 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    701 ULong arm64g_calc_crc32w ( ULong acc, ULong bits )
    702 {
    703    UInt  i;
    704    ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
    705    for (i = 0; i < 32; i++)
    706       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
    707    return crc;
    708 }
    709 
    710 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    711 ULong arm64g_calc_crc32x ( ULong acc, ULong bits )
    712 {
    713    UInt  i;
    714    ULong crc = bits ^ acc;
    715    for (i = 0; i < 64; i++)
    716       crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
    717    return crc;
    718 
    719 }
    720 
    721 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    722 ULong arm64g_calc_crc32cb ( ULong acc, ULong bits )
    723 {
    724    UInt  i;
    725    ULong crc = (bits & 0xFFULL) ^ acc;
    726    for (i = 0; i < 8; i++)
    727       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
    728    return crc;
    729 }
    730 
    731 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    732 ULong arm64g_calc_crc32ch ( ULong acc, ULong bits )
    733 {
    734    UInt  i;
    735    ULong crc = (bits & 0xFFFFULL) ^ acc;
    736    for (i = 0; i < 16; i++)
    737       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
    738    return crc;
    739 }
    740 
    741 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    742 ULong arm64g_calc_crc32cw ( ULong acc, ULong bits )
    743 {
    744    UInt  i;
    745    ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
    746    for (i = 0; i < 32; i++)
    747       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
    748    return crc;
    749 }
    750 
    751 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
    752 ULong arm64g_calc_crc32cx ( ULong acc, ULong bits )
    753 {
    754    UInt  i;
    755    ULong crc = bits ^ acc;
    756    for (i = 0; i < 64; i++)
    757       crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
    758    return crc;
    759 }
    760 
    761 
    762 /* CALLED FROM GENERATED CODE */
    763 /* DIRTY HELPER (non-referentially-transparent) */
    764 /* Horrible hack.  On non-arm64 platforms, return 0. */
    765 ULong arm64g_dirtyhelper_MRS_CNTVCT_EL0 ( void )
    766 {
    767 #  if defined(__aarch64__) && !defined(__arm__)
    768    ULong w = 0x5555555555555555ULL; /* overwritten */
    769    __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(w));
    770    return w;
    771 #  else
    772    return 0ULL;
    773 #  endif
    774 }
    775 
    776 
    777 /* CALLED FROM GENERATED CODE */
    778 /* DIRTY HELPER (non-referentially-transparent) */
    779 /* Horrible hack.  On non-arm64 platforms, return 0. */
    780 ULong arm64g_dirtyhelper_MRS_CNTFRQ_EL0 ( void )
    781 {
    782 #  if defined(__aarch64__) && !defined(__arm__)
    783    ULong w = 0x5555555555555555ULL; /* overwritten */
    784    __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(w));
    785    return w;
    786 #  else
    787    return 0ULL;
    788 #  endif
    789 }
    790 
    791 
    792 void arm64g_dirtyhelper_PMULLQ ( /*OUT*/V128* res, ULong arg1, ULong arg2 )
    793 {
    794    /* This doesn't need to be a dirty helper, except for the fact that
    795       a clean helper can't return a 128 bit value.  This is a pretty
    796       lame implementation of PMULLQ, but at least it doesn't contain any
    797       data dependent branches, and has lots of ILP.  I guess we could unroll
    798       the loop completely and offer extensive prayers to the gods of ILP
    799       if more performance is needed. */
    800    UInt i;
    801    ULong accHi = 0, accLo = 0;
    802    ULong op2Hi = 0, op2Lo = arg2;
    803    for (i = 0; i < 64; i++) {
    804       /* Make |mask| be all 0s or all 1s, a copy of arg1[i] */
    805       Long mask = arg1 << (63-i);
    806       mask >>= 63;
    807       accHi ^= (op2Hi & mask);
    808       accLo ^= (op2Lo & mask);
    809       /* do: op2Hi:op2Lo <<=u 1 */
    810       op2Hi <<= 1;
    811       op2Hi |= ((op2Lo >> 63) & 1);
    812       op2Lo <<= 1;
    813    }
    814    res->w64[1] = accHi;
    815    res->w64[0] = accLo;
    816 }
    817 
    818 
    819 /*---------------------------------------------------------------*/
    820 /*--- Crypto instruction helpers                              ---*/
    821 /*---------------------------------------------------------------*/
    822 
    823 /* DIRTY HELPERS for doing AES support:
    824    * AESE (SubBytes, then ShiftRows)
    825    * AESD (InvShiftRows, then InvSubBytes)
    826    * AESMC (MixColumns)
    827    * AESIMC (InvMixColumns)
    828    These don't actually have to be dirty helpers -- they could be
    829    clean, but for the fact that they return a V128 and a clean helper
    830    can't do that.
    831 
    832    The ARMv8 manual seems to imply that AESE first performs ShiftRows,
    833    then SubBytes.  This seems to contradict FIPS 197, so the
    834    implementation below is consistent with FIPS 197.  One can observe
    835    that the two transformations commute -- the order in which they
    836    happen makes no difference to the result.  So the ambiguity doesn't
    837    actually matter, but it is confusing.  The v8 manual looks correct
    838    about AESD, though.
    839 
    840    The three functions rj_xtime, aesMixColumn and aesInvMixColumn only,
    841    are taken from "A byte-oriented AES-256 implementation" and are subject
    842    to the following usage terms:
    843 
    844      Byte-oriented AES-256 implementation.
    845      All lookup tables replaced with 'on the fly' calculations.
    846 
    847      Copyright (c) 2007-2011 Ilya O. Levin, http://www.literatecode.com
    848      Other contributors: Hal Finney
    849 
    850      Permission to use, copy, modify, and distribute this software for any
    851      purpose with or without fee is hereby granted, provided that the above
    852      copyright notice and this permission notice appear in all copies.
    853 
    854      THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    855      WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    856      MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    857      ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    858      WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    859      ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    860      OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    861 */
    862 
    863 const UChar aesMapSubBytes[256]
    864    = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
    865        0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
    866        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
    867        0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
    868        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
    869        0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
    870        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
    871        0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
    872        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
    873        0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
    874        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
    875        0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
    876        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
    877        0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
    878        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
    879        0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
    880        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
    881        0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
    882        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
    883        0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
    884        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
    885        0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
    886        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
    887        0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
    888        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
    889        0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
    890        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
    891        0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
    892        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
    893        0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
    894        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
    895        0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
    896      };
    897 
    898 const UChar aesMapInvSubBytes[256]
    899    = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
    900        0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
    901        0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
    902        0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
    903        0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
    904        0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
    905        0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
    906        0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
    907        0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
    908        0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
    909        0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
    910        0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
    911        0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
    912        0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
    913        0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
    914        0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
    915        0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
    916        0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
    917        0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
    918        0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
    919        0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
    920        0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
    921        0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
    922        0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
    923        0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
    924        0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
    925        0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
    926        0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
    927        0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
    928        0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
    929        0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
    930        0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
    931      };
    932 
    933 static inline UChar rj_xtime ( UChar x )
    934 {
    935    UChar y = (UChar)(x << 1);
    936    return (x & 0x80) ? (y ^ 0x1b) : y;
    937 }
    938 
    939 static void aesMixColumn ( /*MOD*/UChar* r )
    940 {
    941    UChar a = r[0];
    942    UChar b = r[1];
    943    UChar c = r[2];
    944    UChar d = r[3];
    945    UChar e = a ^ b ^ c ^ d;
    946    r[0] ^= e ^ rj_xtime(a ^ b);
    947    r[1] ^= e ^ rj_xtime(b ^ c);
    948    r[2] ^= e ^ rj_xtime(c ^ d);
    949    r[3] ^= e ^ rj_xtime(d ^ a);
    950 }
    951 
    952 static void aesInvMixColumn ( /*MOD*/UChar* r )
    953 {
    954    UChar a = r[0];
    955    UChar b = r[1];
    956    UChar c = r[2];
    957    UChar d = r[3];
    958    UChar e = a ^ b ^ c ^ d;
    959    UChar z = rj_xtime(e);
    960    UChar x = e ^ rj_xtime(rj_xtime(z ^ a ^ c));
    961    UChar y = e ^ rj_xtime(rj_xtime(z ^ b ^ d));
    962    r[0] ^= x ^ rj_xtime(a ^ b);
    963    r[1] ^= y ^ rj_xtime(b ^ c);
    964    r[2] ^= x ^ rj_xtime(c ^ d);
    965    r[3] ^= y ^ rj_xtime(d ^ a);
    966 }
    967 
    968 
    969 /* CALLED FROM GENERATED CODE */
    970 void arm64g_dirtyhelper_AESE ( /*OUT*/V128* res, ULong argHi, ULong argLo )
    971 {
    972    res->w64[1] = argHi;
    973    res->w64[0] = argLo;
    974 
    975    /* First do SubBytes on the State. */
    976    UInt i;
    977    for (i = 0; i < 16; i++) {
    978       res->w8[i] = aesMapSubBytes[res->w8[i] & 0xFF];
    979    }
    980 
    981    /* Then do ShiftRows on the State. */
    982 #  define XX(_ix) res->w8[_ix]
    983    { UChar old1 = XX(1);
    984      XX(1) = XX(5); XX(5) = XX(9); XX(9) = XX(13); XX(13) = old1;
    985    }
    986    { UChar old2 = XX(2); UChar old6 = XX(6);
    987      XX(2) = XX(10); XX(6) = XX(14); XX(10) = old2; XX(14) = old6;
    988    }
    989    { UChar old15 = XX(15);
    990      XX(15) = XX(11); XX(11) = XX(7); XX(7) = XX(3); XX(3) = old15;
    991    }
    992 #  undef XX
    993 }
    994 
    995 
    996 /* CALLED FROM GENERATED CODE */
    997 void arm64g_dirtyhelper_AESD ( /*OUT*/V128* res, ULong argHi, ULong argLo )
    998 {
    999    res->w64[1] = argHi;
   1000    res->w64[0] = argLo;
   1001 
   1002    /* First do InvShiftRows on the State. */
   1003 #  define XX(_ix) res->w8[_ix]
   1004    { UChar old13 = XX(13);
   1005      XX(13) = XX(9); XX(9) = XX(5); XX(5) = XX(1); XX(1) = old13;
   1006    }
   1007    { UChar old14 = XX(14); UChar old10 = XX(10);
   1008      XX(14) = XX(6); XX(10) = XX(2); XX(6) = old14; XX(2) = old10;
   1009    }
   1010    { UChar old3 = XX(3);
   1011      XX(3) = XX(7); XX(7) = XX(11); XX(11) = XX(15); XX(15) = old3;
   1012    }
   1013 #  undef XX
   1014 
   1015 /* Then do InvSubBytes on the State. */
   1016    UInt i;
   1017    for (i = 0; i < 16; i++) {
   1018       res->w8[i] = aesMapInvSubBytes[res->w8[i] & 0xFF];
   1019    }
   1020 }
   1021 
   1022 
   1023 /* CALLED FROM GENERATED CODE */
   1024 void arm64g_dirtyhelper_AESMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
   1025 {
   1026    res->w64[1] = argHi;
   1027    res->w64[0] = argLo;
   1028    aesMixColumn(&res->w8[0]);
   1029    aesMixColumn(&res->w8[4]);
   1030    aesMixColumn(&res->w8[8]);
   1031    aesMixColumn(&res->w8[12]);
   1032 }
   1033 
   1034 
   1035 /* CALLED FROM GENERATED CODE */
   1036 void arm64g_dirtyhelper_AESIMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
   1037 {
   1038    res->w64[1] = argHi;
   1039    res->w64[0] = argLo;
   1040    aesInvMixColumn(&res->w8[0]);
   1041    aesInvMixColumn(&res->w8[4]);
   1042    aesInvMixColumn(&res->w8[8]);
   1043    aesInvMixColumn(&res->w8[12]);
   1044 }
   1045 
   1046 
   1047 /* DIRTY HELPERS for SHA instruction support.  As with the AES helpers
   1048    above, these are actually pure functions and are only dirty because
   1049    clean helpers can't return a V128. */
   1050 
   1051 static inline UInt ROL32 ( UInt x, UInt sh ) {
   1052    vassert(sh > 0 && sh < 32);
   1053    return (x << sh) | (x >> (32 - sh));
   1054 }
   1055 
   1056 static inline UInt ROR32 ( UInt x, UInt sh ) {
   1057    vassert(sh > 0 && sh < 32);
   1058    return (x >> sh) | (x << (32 - sh));
   1059 }
   1060 
   1061 static inline UInt SHAchoose ( UInt x, UInt y, UInt z ) {
   1062    return ((y ^ z) & x) ^ z;
   1063 }
   1064 
   1065 static inline UInt SHAmajority ( UInt x, UInt y, UInt z ) {
   1066    return (x & y) | ((x | y) & z);
   1067 }
   1068 
   1069 static inline UInt SHAparity ( UInt x, UInt y, UInt z ) {
   1070    return x ^ y ^ z;
   1071 }
   1072 
   1073 static inline UInt SHAhashSIGMA0 ( UInt x ) {
   1074    return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22);
   1075 }
   1076 
   1077 static inline UInt SHAhashSIGMA1 ( UInt x ) {
   1078    return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25);
   1079 }
   1080 
   1081 static void SHA256hash ( /*MOD*/V128* X, /*MOD*/V128* Y, const V128* W )
   1082 {
   1083    UInt e;
   1084    for (e = 0; e <= 3; e++) {
   1085       UInt chs = SHAchoose(Y->w32[0], Y->w32[1], Y->w32[2]);
   1086       UInt maj = SHAmajority(X->w32[0], X->w32[1], X->w32[2]);
   1087       UInt t   = Y->w32[3] + SHAhashSIGMA1(Y->w32[0]) + chs + W->w32[e];
   1088       X->w32[3] = t + X->w32[3];
   1089       Y->w32[3] = t + SHAhashSIGMA0(X->w32[0]) + maj;
   1090       UInt ts = Y->w32[3];
   1091       Y->w32[3] = Y->w32[2];
   1092       Y->w32[2] = Y->w32[1];
   1093       Y->w32[1] = Y->w32[0];
   1094       Y->w32[0] = X->w32[3];
   1095       X->w32[3] = X->w32[2];
   1096       X->w32[2] = X->w32[1];
   1097       X->w32[1] = X->w32[0];
   1098       X->w32[0] = ts;
   1099    }
   1100 }
   1101 
   1102 /* CALLED FROM GENERATED CODE */
   1103 void arm64g_dirtyhelper_SHA1C ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1104                                 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
   1105 {
   1106    vassert(nHi == 0);
   1107    vassert((nLo >> 32) == 0);
   1108    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
   1109    UInt Y; Y = (UInt)nLo;
   1110    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
   1111    UInt e;
   1112    for (e = 0; e <= 3; e++) {
   1113       UInt t = SHAchoose(X.w32[1], X.w32[2], X.w32[3]);
   1114       Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
   1115       X.w32[1] = ROL32(X.w32[1], 30);
   1116       UInt oldY = Y;
   1117       Y = X.w32[3];
   1118       X.w32[3] = X.w32[2];
   1119       X.w32[2] = X.w32[1];
   1120       X.w32[1] = X.w32[0];
   1121       X.w32[0] = oldY;
   1122    }
   1123    res->w64[1] = X.w64[1];
   1124    res->w64[0] = X.w64[0];
   1125 }
   1126 
   1127 /* CALLED FROM GENERATED CODE */
   1128 void arm64g_dirtyhelper_SHA1H ( /*OUT*/V128* res, ULong nHi, ULong nLo )
   1129 {
   1130    vassert(nHi == 0);
   1131    vassert((nLo >> 32) == 0);
   1132    res->w32[3] = res->w32[2] = res->w32[1] = 0;
   1133    res->w32[0] = ROL32((UInt)nLo, 30);
   1134 }
   1135 
   1136 /* CALLED FROM GENERATED CODE */
   1137 void arm64g_dirtyhelper_SHA1M ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1138                                 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
   1139 {
   1140    vassert(nHi == 0);
   1141    vassert((nLo >> 32) == 0);
   1142    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
   1143    UInt Y; Y = (UInt)nLo;
   1144    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
   1145    UInt e;
   1146    for (e = 0; e <= 3; e++) {
   1147       UInt t = SHAmajority(X.w32[1], X.w32[2], X.w32[3]);
   1148       Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
   1149       X.w32[1] = ROL32(X.w32[1], 30);
   1150       UInt oldY = Y;
   1151       Y = X.w32[3];
   1152       X.w32[3] = X.w32[2];
   1153       X.w32[2] = X.w32[1];
   1154       X.w32[1] = X.w32[0];
   1155       X.w32[0] = oldY;
   1156    }
   1157    res->w64[1] = X.w64[1];
   1158    res->w64[0] = X.w64[0];
   1159 }
   1160 
   1161 /* CALLED FROM GENERATED CODE */
   1162 void arm64g_dirtyhelper_SHA1P ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1163                                 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
   1164 {
   1165    vassert(nHi == 0);
   1166    vassert((nLo >> 32) == 0);
   1167    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
   1168    UInt Y; Y = (UInt)nLo;
   1169    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
   1170    UInt e;
   1171    for (e = 0; e <= 3; e++) {
   1172       UInt t = SHAparity(X.w32[1], X.w32[2], X.w32[3]);
   1173       Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
   1174       X.w32[1] = ROL32(X.w32[1], 30);
   1175       UInt oldY = Y;
   1176       Y = X.w32[3];
   1177       X.w32[3] = X.w32[2];
   1178       X.w32[2] = X.w32[1];
   1179       X.w32[1] = X.w32[0];
   1180       X.w32[0] = oldY;
   1181    }
   1182    res->w64[1] = X.w64[1];
   1183    res->w64[0] = X.w64[0];
   1184 }
   1185 
   1186 /* CALLED FROM GENERATED CODE */
   1187 void arm64g_dirtyhelper_SHA1SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1188                                   ULong nHi, ULong nLo, ULong mHi, ULong mLo )
   1189 {
   1190    res->w64[1] = nLo;
   1191    res->w64[0] = dHi;
   1192    res->w64[1] ^= dHi ^ mHi;
   1193    res->w64[0] ^= dLo ^ mLo;
   1194 }
   1195 
   1196 /* CALLED FROM GENERATED CODE */
   1197 void arm64g_dirtyhelper_SHA1SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1198                                   ULong nHi, ULong nLo )
   1199 {
   1200    /* This computes "T = Vd ^ (Vn >>u 32)" */
   1201    V128 T; T.w64[1] = nHi; T.w64[0] = nLo;
   1202    T.w32[0] = T.w32[1];
   1203    T.w32[1] = T.w32[2];
   1204    T.w32[2] = T.w32[3];
   1205    T.w32[3] = 0;
   1206    T.w64[1] ^= dHi;
   1207    T.w64[0] ^= dLo;
   1208    /* */
   1209    res->w32[0] = ROL32(T.w32[0], 1);
   1210    res->w32[1] = ROL32(T.w32[1], 1);
   1211    res->w32[2] = ROL32(T.w32[2], 1);
   1212    res->w32[3] = ROL32(T.w32[3], 1) ^ ROL32(T.w32[0], 2);
   1213 }
   1214 
   1215 /* CALLED FROM GENERATED CODE */
   1216 void arm64g_dirtyhelper_SHA256H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1217                                    ULong nHi, ULong nLo, ULong mHi, ULong mLo )
   1218 {
   1219    V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
   1220    V128 Y; Y.w64[1] = dHi; Y.w64[0] = dLo;
   1221    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
   1222    SHA256hash(&X, &Y, &W);
   1223    res->w64[1] = Y.w64[1];
   1224    res->w64[0] = Y.w64[0];
   1225 }
   1226 
   1227 /* CALLED FROM GENERATED CODE */
   1228 void arm64g_dirtyhelper_SHA256H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1229                                   ULong nHi, ULong nLo, ULong mHi, ULong mLo )
   1230 {
   1231    V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
   1232    V128 Y; Y.w64[1] = nHi; Y.w64[0] = nLo;
   1233    V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
   1234    SHA256hash(&X, &Y, &W);
   1235    res->w64[1] = X.w64[1];
   1236    res->w64[0] = X.w64[0];
   1237 }
   1238 
   1239 /* CALLED FROM GENERATED CODE */
   1240 void arm64g_dirtyhelper_SHA256SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1241                                     ULong nHi, ULong nLo )
   1242 
   1243 {
   1244    res->w64[1] = res->w64[0] = 0;
   1245    V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
   1246    V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
   1247    V128 T;
   1248    T.w32[3] = op2.w32[0];
   1249    T.w32[2] = op1.w32[3];
   1250    T.w32[1] = op1.w32[2];
   1251    T.w32[0] = op1.w32[1];
   1252    UInt e;
   1253    for (e = 0; e <= 3; e++) {
   1254       UInt elt = T.w32[e];
   1255       elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ (elt >> 3);
   1256       res->w32[e] = elt + op1.w32[e];
   1257    }
   1258 }
   1259 
   1260 /* CALLED FROM GENERATED CODE */
   1261 void arm64g_dirtyhelper_SHA256SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
   1262                                     ULong nHi, ULong nLo,
   1263                                     ULong mHi, ULong mLo )
   1264 {
   1265    res->w64[0] = res->w64[1] = 0;
   1266    V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
   1267    V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
   1268    V128 op3; op3.w64[1] = mHi; op3.w64[0] = mLo;
   1269    V128 T0;
   1270    T0.w32[3] = op3.w32[0];
   1271    T0.w32[2] = op2.w32[3];
   1272    T0.w32[1] = op2.w32[2];
   1273    T0.w32[0] = op2.w32[1];
   1274    UInt T1[2];
   1275    UInt e;
   1276    T1[1] = op3.w32[3];
   1277    T1[0] = op3.w32[2];
   1278    for (e = 0; e <= 1; e++) {
   1279       UInt elt = T1[e];
   1280       elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
   1281       elt = elt + op1.w32[e] + T0.w32[e];
   1282       res->w32[e] = elt;
   1283    }
   1284    T1[1] = res->w32[1];
   1285    T1[0] = res->w32[0];
   1286    for (e = 2; e <= 3; e++) {
   1287       UInt elt = T1[e-2];
   1288       elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
   1289       elt = elt + op1.w32[e] + T0.w32[e];
   1290       res->w32[e] = elt;
   1291    }
   1292 }
   1293 
   1294 
   1295 /*---------------------------------------------------------------*/
   1296 /*--- Flag-helpers translation-time function specialisers.    ---*/
   1297 /*--- These help iropt specialise calls the above run-time    ---*/
   1298 /*--- flags functions.                                        ---*/
   1299 /*---------------------------------------------------------------*/
   1300 
   1301 /* Used by the optimiser to try specialisations.  Returns an
   1302    equivalent expression, or NULL if none. */
   1303 
   1304 static Bool isU64 ( IRExpr* e, ULong n )
   1305 {
   1306    return
   1307       toBool( e->tag == Iex_Const
   1308               && e->Iex.Const.con->tag == Ico_U64
   1309               && e->Iex.Const.con->Ico.U64 == n );
   1310 }
   1311 
   1312 IRExpr* guest_arm64_spechelper ( const HChar* function_name,
   1313                                  IRExpr** args,
   1314                                  IRStmt** precedingStmts,
   1315                                  Int      n_precedingStmts )
   1316 {
   1317 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
   1318 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
   1319 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
   1320 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
   1321 
   1322    Int i, arity = 0;
   1323    for (i = 0; args[i]; i++)
   1324       arity++;
   1325 //ZZ #  if 0
   1326 //ZZ    vex_printf("spec request:\n");
   1327 //ZZ    vex_printf("   %s  ", function_name);
   1328 //ZZ    for (i = 0; i < arity; i++) {
   1329 //ZZ       vex_printf("  ");
   1330 //ZZ       ppIRExpr(args[i]);
   1331 //ZZ    }
   1332 //ZZ    vex_printf("\n");
   1333 //ZZ #  endif
   1334 
   1335    /* --------- specialising "arm64g_calculate_condition" --------- */
   1336 
   1337    if (vex_streq(function_name, "arm64g_calculate_condition")) {
   1338 
   1339       /* specialise calls to the "arm64g_calculate_condition" function.
   1340          Not sure whether this is strictly necessary, but: the
   1341          replacement IR must produce only the values 0 or 1.  Bits
   1342          63:1 are required to be zero. */
   1343       IRExpr *cond_n_op, *cc_dep1, *cc_dep2  ; //, *cc_ndep;
   1344       vassert(arity == 4);
   1345       cond_n_op = args[0]; /* (ARM64Condcode << 4)  |  ARM64G_CC_OP_* */
   1346       cc_dep1   = args[1];
   1347       cc_dep2   = args[2];
   1348       //cc_ndep   = args[3];
   1349 
   1350       /*---------------- SUB64 ----------------*/
   1351 
   1352       /* 0, 1 */
   1353       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
   1354          /* EQ after SUB --> test argL == argR */
   1355          return unop(Iop_1Uto64,
   1356                      binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
   1357       }
   1358       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
   1359          /* NE after SUB --> test argL != argR */
   1360          return unop(Iop_1Uto64,
   1361                      binop(Iop_CmpNE64, cc_dep1, cc_dep2));
   1362       }
   1363 
   1364       /* 2, 3 */
   1365       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
   1366          /* CS after SUB --> test argL >=u argR
   1367                          --> test argR <=u argL */
   1368          return unop(Iop_1Uto64,
   1369                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
   1370       }
   1371       if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
   1372          /* CC after SUB --> test argL <u argR */
   1373          return unop(Iop_1Uto64,
   1374                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
   1375       }
   1376 
   1377       /* 8, 9 */
   1378       if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
   1379          /* LS after SUB --> test argL <=u argR */
   1380          return unop(Iop_1Uto64,
   1381                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
   1382       }
   1383       if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
   1384          /* HI after SUB --> test argL >u argR
   1385                          --> test argR <u argL */
   1386          return unop(Iop_1Uto64,
   1387                      binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
   1388       }
   1389 
   1390       /* 10, 11 */
   1391       if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
   1392          /* LT after SUB --> test argL <s argR */
   1393          return unop(Iop_1Uto64,
   1394                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
   1395       }
   1396       if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
   1397          /* GE after SUB --> test argL >=s argR
   1398                          --> test argR <=s argL */
   1399          return unop(Iop_1Uto64,
   1400                      binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
   1401       }
   1402 
   1403       /* 12, 13 */
   1404       if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
   1405          /* GT after SUB --> test argL >s argR
   1406                          --> test argR <s argL */
   1407          return unop(Iop_1Uto64,
   1408                      binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
   1409       }
   1410       if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
   1411          /* LE after SUB --> test argL <=s argR */
   1412          return unop(Iop_1Uto64,
   1413                      binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
   1414       }
   1415 
   1416       /*---------------- SUB32 ----------------*/
   1417 
   1418       /* 0, 1 */
   1419       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
   1420          /* EQ after SUB --> test argL == argR */
   1421          return unop(Iop_1Uto64,
   1422                      binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
   1423                                         unop(Iop_64to32, cc_dep2)));
   1424       }
   1425       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
   1426          /* NE after SUB --> test argL != argR */
   1427          return unop(Iop_1Uto64,
   1428                      binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
   1429                                         unop(Iop_64to32, cc_dep2)));
   1430       }
   1431 
   1432       /* 2, 3 */
   1433       if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
   1434          /* CS after SUB --> test argL >=u argR
   1435                          --> test argR <=u argL */
   1436          return unop(Iop_1Uto64,
   1437                      binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
   1438                                          unop(Iop_64to32, cc_dep1)));
   1439       }
   1440       if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
   1441          /* CC after SUB --> test argL <u argR */
   1442          return unop(Iop_1Uto64,
   1443                      binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
   1444                                          unop(Iop_64to32, cc_dep2)));
   1445       }
   1446 
   1447       /* 8, 9 */
   1448       if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
   1449          /* LS after SUB --> test argL <=u argR */
   1450          return unop(Iop_1Uto64,
   1451                      binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
   1452                                          unop(Iop_64to32, cc_dep2)));
   1453       }
   1454       if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
   1455          /* HI after SUB --> test argL >u argR
   1456                          --> test argR <u argL */
   1457          return unop(Iop_1Uto64,
   1458                      binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
   1459                                          unop(Iop_64to32, cc_dep1)));
   1460       }
   1461 
   1462       /* 10, 11 */
   1463       if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
   1464          /* LT after SUB --> test argL <s argR */
   1465          return unop(Iop_1Uto64,
   1466                      binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
   1467                                          unop(Iop_64to32, cc_dep2)));
   1468       }
   1469       if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
   1470          /* GE after SUB --> test argL >=s argR
   1471                          --> test argR <=s argL */
   1472          return unop(Iop_1Uto64,
   1473                      binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
   1474                                          unop(Iop_64to32, cc_dep1)));
   1475       }
   1476 
   1477       /* 12, 13 */
   1478       if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
   1479          /* GT after SUB --> test argL >s argR
   1480                          --> test argR <s argL */
   1481          return unop(Iop_1Uto64,
   1482                      binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
   1483                                          unop(Iop_64to32, cc_dep1)));
   1484       }
   1485       if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
   1486          /* LE after SUB --> test argL <=s argR */
   1487          return unop(Iop_1Uto64,
   1488                      binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
   1489                                          unop(Iop_64to32, cc_dep2)));
   1490       }
   1491 
   1492 //ZZ       /*---------------- SBB ----------------*/
   1493 //ZZ
   1494 //ZZ       if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
   1495 //ZZ          /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
   1496 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
   1497 //ZZ          /* HS after SBB (same as C after SBB below)
   1498 //ZZ             --> oldC ? (argL >=u argR) : (argL >u argR)
   1499 //ZZ             --> oldC ? (argR <=u argL) : (argR <u argL)
   1500 //ZZ          */
   1501 //ZZ          return
   1502 //ZZ             IRExpr_ITE(
   1503 //ZZ                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
   1504 //ZZ                /* case oldC != 0 */
   1505 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
   1506 //ZZ                /* case oldC == 0 */
   1507 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
   1508 //ZZ             );
   1509 //ZZ       }
   1510 //ZZ
   1511 //ZZ       /*---------------- LOGIC ----------------*/
   1512 //ZZ
   1513 //ZZ       if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
   1514 //ZZ          /* EQ after LOGIC --> test res == 0 */
   1515 //ZZ          return unop(Iop_1Uto32,
   1516 //ZZ                      binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
   1517 //ZZ       }
   1518 //ZZ       if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
   1519 //ZZ          /* NE after LOGIC --> test res != 0 */
   1520 //ZZ          return unop(Iop_1Uto32,
   1521 //ZZ                      binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
   1522 //ZZ       }
   1523 //ZZ
   1524 //ZZ       if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
   1525 //ZZ          /* PL after LOGIC --> test (res >> 31) == 0 */
   1526 //ZZ          return unop(Iop_1Uto32,
   1527 //ZZ                      binop(Iop_CmpEQ32,
   1528 //ZZ                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
   1529 //ZZ                            mkU32(0)));
   1530 //ZZ       }
   1531 //ZZ       if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
   1532 //ZZ          /* MI after LOGIC --> test (res >> 31) == 1 */
   1533 //ZZ          return unop(Iop_1Uto32,
   1534 //ZZ                      binop(Iop_CmpEQ32,
   1535 //ZZ                            binop(Iop_Shr32, cc_dep1, mkU8(31)),
   1536 //ZZ                            mkU32(1)));
   1537 //ZZ       }
   1538 
   1539       /*---------------- COPY ----------------*/
   1540 
   1541       if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
   1542          /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
   1543          return binop(Iop_And64,
   1544                       binop(Iop_Shr64, cc_dep1,
   1545                                        mkU8(ARM64G_CC_SHIFT_Z)),
   1546                       mkU64(1));
   1547       }
   1548       if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
   1549          /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
   1550          return binop(Iop_And64,
   1551                       binop(Iop_Xor64,
   1552                             binop(Iop_Shr64, cc_dep1,
   1553                                              mkU8(ARM64G_CC_SHIFT_Z)),
   1554                             mkU64(1)),
   1555                       mkU64(1));
   1556       }
   1557 
   1558 //ZZ       /*----------------- AL -----------------*/
   1559 //ZZ
   1560 //ZZ       /* A critically important case for Thumb code.
   1561 //ZZ
   1562 //ZZ          What we're trying to spot is the case where cond_n_op is an
   1563 //ZZ          expression of the form Or32(..., 0xE0) since that means the
   1564 //ZZ          caller is asking for CondAL and we can simply return 1
   1565 //ZZ          without caring what the ... part is.  This is a potentially
   1566 //ZZ          dodgy kludge in that it assumes that the ... part has zeroes
   1567 //ZZ          in bits 7:4, so that the result of the Or32 is guaranteed to
   1568 //ZZ          be 0xE in bits 7:4.  Given that the places where this first
   1569 //ZZ          arg are constructed (in guest_arm_toIR.c) are very
   1570 //ZZ          constrained, we can get away with this.  To make this
   1571 //ZZ          guaranteed safe would require to have a new primop, Slice44
   1572 //ZZ          or some such, thusly
   1573 //ZZ
   1574 //ZZ          Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
   1575 //ZZ
   1576 //ZZ          and we would then look for Slice44(0xE0, ...)
   1577 //ZZ          which would give the required safety property.
   1578 //ZZ
   1579 //ZZ          It would be infeasibly expensive to scan backwards through
   1580 //ZZ          the entire block looking for an assignment to the temp, so
   1581 //ZZ          just look at the previous 16 statements.  That should find it
   1582 //ZZ          if it is an interesting case, as a result of how the
   1583 //ZZ          boilerplate guff at the start of each Thumb insn translation
   1584 //ZZ          is made.
   1585 //ZZ       */
   1586 //ZZ       if (cond_n_op->tag == Iex_RdTmp) {
   1587 //ZZ          Int    j;
   1588 //ZZ          IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
   1589 //ZZ          Int    limit    = n_precedingStmts - 16;
   1590 //ZZ          if (limit < 0) limit = 0;
   1591 //ZZ          if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
   1592 //ZZ          for (j = n_precedingStmts - 1; j >= limit; j--) {
   1593 //ZZ             IRStmt* st = precedingStmts[j];
   1594 //ZZ             if (st->tag == Ist_WrTmp
   1595 //ZZ                 && st->Ist.WrTmp.tmp == look_for
   1596 //ZZ                 && st->Ist.WrTmp.data->tag == Iex_Binop
   1597 //ZZ                 && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
   1598 //ZZ                 && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
   1599 //ZZ                return mkU32(1);
   1600 //ZZ          }
   1601 //ZZ          /* Didn't find any useful binding to the first arg
   1602 //ZZ             in the previous 16 stmts. */
   1603 //ZZ       }
   1604    }
   1605 
   1606 //ZZ    /* --------- specialising "armg_calculate_flag_c" --------- */
   1607 //ZZ
   1608 //ZZ    else
   1609 //ZZ    if (vex_streq(function_name, "armg_calculate_flag_c")) {
   1610 //ZZ
   1611 //ZZ       /* specialise calls to the "armg_calculate_flag_c" function.
   1612 //ZZ          Note that the returned value must be either 0 or 1; nonzero
   1613 //ZZ          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
   1614 //ZZ          values (from the thunk) are assumed to have bits 31:1
   1615 //ZZ          clear. */
   1616 //ZZ       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1617 //ZZ       vassert(arity == 4);
   1618 //ZZ       cc_op   = args[0]; /* ARMG_CC_OP_* */
   1619 //ZZ       cc_dep1 = args[1];
   1620 //ZZ       cc_dep2 = args[2];
   1621 //ZZ       cc_ndep = args[3];
   1622 //ZZ
   1623 //ZZ       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
   1624 //ZZ          /* Thunk args are (result, shco, oldV) */
   1625 //ZZ          /* C after LOGIC --> shco */
   1626 //ZZ          return cc_dep2;
   1627 //ZZ       }
   1628 //ZZ
   1629 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
   1630 //ZZ          /* Thunk args are (argL, argR, unused) */
   1631 //ZZ          /* C after SUB --> argL >=u argR
   1632 //ZZ                         --> argR <=u argL */
   1633 //ZZ          return unop(Iop_1Uto32,
   1634 //ZZ                      binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
   1635 //ZZ       }
   1636 //ZZ
   1637 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
   1638 //ZZ          /* This happens occasionally in softfloat code, eg __divdf3+140 */
   1639 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
   1640 //ZZ          /* C after SBB (same as HS after SBB above)
   1641 //ZZ             --> oldC ? (argL >=u argR) : (argL >u argR)
   1642 //ZZ             --> oldC ? (argR <=u argL) : (argR <u argL)
   1643 //ZZ          */
   1644 //ZZ          return
   1645 //ZZ             IRExpr_ITE(
   1646 //ZZ                binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
   1647 //ZZ                /* case oldC != 0 */
   1648 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
   1649 //ZZ                /* case oldC == 0 */
   1650 //ZZ                unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
   1651 //ZZ             );
   1652 //ZZ       }
   1653 //ZZ
   1654 //ZZ    }
   1655 //ZZ
   1656 //ZZ    /* --------- specialising "armg_calculate_flag_v" --------- */
   1657 //ZZ
   1658 //ZZ    else
   1659 //ZZ    if (vex_streq(function_name, "armg_calculate_flag_v")) {
   1660 //ZZ
   1661 //ZZ       /* specialise calls to the "armg_calculate_flag_v" function.
   1662 //ZZ          Note that the returned value must be either 0 or 1; nonzero
   1663 //ZZ          bits 31:1 are not allowed.  In turn, incoming oldV and oldC
   1664 //ZZ          values (from the thunk) are assumed to have bits 31:1
   1665 //ZZ          clear. */
   1666 //ZZ       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
   1667 //ZZ       vassert(arity == 4);
   1668 //ZZ       cc_op   = args[0]; /* ARMG_CC_OP_* */
   1669 //ZZ       cc_dep1 = args[1];
   1670 //ZZ       cc_dep2 = args[2];
   1671 //ZZ       cc_ndep = args[3];
   1672 //ZZ
   1673 //ZZ       if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
   1674 //ZZ          /* Thunk args are (result, shco, oldV) */
   1675 //ZZ          /* V after LOGIC --> oldV */
   1676 //ZZ          return cc_ndep;
   1677 //ZZ       }
   1678 //ZZ
   1679 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SUB)) {
   1680 //ZZ          /* Thunk args are (argL, argR, unused) */
   1681 //ZZ          /* V after SUB
   1682 //ZZ             --> let res = argL - argR
   1683 //ZZ                 in ((argL ^ argR) & (argL ^ res)) >> 31
   1684 //ZZ             --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
   1685 //ZZ          */
   1686 //ZZ          IRExpr* argL = cc_dep1;
   1687 //ZZ          IRExpr* argR = cc_dep2;
   1688 //ZZ          return
   1689 //ZZ             binop(Iop_Shr32,
   1690 //ZZ                   binop(Iop_And32,
   1691 //ZZ                         binop(Iop_Xor32, argL, argR),
   1692 //ZZ                         binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
   1693 //ZZ                   ),
   1694 //ZZ                   mkU8(31)
   1695 //ZZ             );
   1696 //ZZ       }
   1697 //ZZ
   1698 //ZZ       if (isU32(cc_op, ARMG_CC_OP_SBB)) {
   1699 //ZZ          /* This happens occasionally in softfloat code, eg __divdf3+140 */
   1700 //ZZ          /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
   1701 //ZZ          /* V after SBB
   1702 //ZZ             --> let res = argL - argR - (oldC ^ 1)
   1703 //ZZ                 in  (argL ^ argR) & (argL ^ res) & 1
   1704 //ZZ          */
   1705 //ZZ          return
   1706 //ZZ             binop(
   1707 //ZZ                Iop_And32,
   1708 //ZZ                binop(
   1709 //ZZ                   Iop_And32,
   1710 //ZZ                   // argL ^ argR
   1711 //ZZ                   binop(Iop_Xor32, cc_dep1, cc_dep2),
   1712 //ZZ                   // argL ^ (argL - argR - (oldC ^ 1))
   1713 //ZZ                   binop(Iop_Xor32,
   1714 //ZZ                         cc_dep1,
   1715 //ZZ                         binop(Iop_Sub32,
   1716 //ZZ                               binop(Iop_Sub32, cc_dep1, cc_dep2),
   1717 //ZZ                               binop(Iop_Xor32, cc_ndep, mkU32(1)))
   1718 //ZZ                   )
   1719 //ZZ                ),
   1720 //ZZ                mkU32(1)
   1721 //ZZ             );
   1722 //ZZ       }
   1723 //ZZ
   1724 //ZZ    }
   1725 
   1726 #  undef unop
   1727 #  undef binop
   1728 #  undef mkU64
   1729 #  undef mkU8
   1730 
   1731    return NULL;
   1732 }
   1733 
   1734 
   1735 /*----------------------------------------------*/
   1736 /*--- The exported fns ..                    ---*/
   1737 /*----------------------------------------------*/
   1738 
   1739 //ZZ /* VISIBLE TO LIBVEX CLIENT */
   1740 //ZZ #if 0
   1741 //ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
   1742 //ZZ                                  /*OUT*/VexGuestARMState* vex_state )
   1743 //ZZ {
   1744 //ZZ    vassert(0); // FIXME
   1745 //ZZ
   1746 //ZZ    /* Mask out everything except N Z V C. */
   1747 //ZZ    flags_native
   1748 //ZZ       &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
   1749 //ZZ
   1750 //ZZ    vex_state->guest_CC_OP   = ARMG_CC_OP_COPY;
   1751 //ZZ    vex_state->guest_CC_DEP1 = flags_native;
   1752 //ZZ    vex_state->guest_CC_DEP2 = 0;
   1753 //ZZ    vex_state->guest_CC_NDEP = 0;
   1754 //ZZ }
   1755 //ZZ #endif
   1756 
   1757 /* VISIBLE TO LIBVEX CLIENT */
   1758 ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
   1759 {
   1760    ULong nzcv = 0;
   1761    // NZCV
   1762    nzcv |= arm64g_calculate_flags_nzcv(
   1763                vex_state->guest_CC_OP,
   1764                vex_state->guest_CC_DEP1,
   1765                vex_state->guest_CC_DEP2,
   1766                vex_state->guest_CC_NDEP
   1767             );
   1768    vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
   1769 //ZZ    // Q
   1770 //ZZ    if (vex_state->guest_QFLAG32 > 0)
   1771 //ZZ       cpsr |= (1 << 27);
   1772 //ZZ    // GE
   1773 //ZZ    if (vex_state->guest_GEFLAG0 > 0)
   1774 //ZZ       cpsr |= (1 << 16);
   1775 //ZZ    if (vex_state->guest_GEFLAG1 > 0)
   1776 //ZZ       cpsr |= (1 << 17);
   1777 //ZZ    if (vex_state->guest_GEFLAG2 > 0)
   1778 //ZZ       cpsr |= (1 << 18);
   1779 //ZZ    if (vex_state->guest_GEFLAG3 > 0)
   1780 //ZZ       cpsr |= (1 << 19);
   1781 //ZZ    // M
   1782 //ZZ    cpsr |= (1 << 4); // 0b10000 means user-mode
   1783 //ZZ    // J,T   J (bit 24) is zero by initialisation above
   1784 //ZZ    // T  we copy from R15T[0]
   1785 //ZZ    if (vex_state->guest_R15T & 1)
   1786 //ZZ       cpsr |= (1 << 5);
   1787 //ZZ    // ITSTATE we punt on for the time being.  Could compute it
   1788 //ZZ    // if needed though.
   1789 //ZZ    // E, endianness, 0 (littleendian) from initialisation above
   1790 //ZZ    // A,I,F disable some async exceptions.  Not sure about these.
   1791 //ZZ    // Leave as zero for the time being.
   1792    return nzcv;
   1793 }
   1794 
   1795 /* VISIBLE TO LIBVEX CLIENT */
   1796 ULong LibVEX_GuestARM64_get_fpsr ( const VexGuestARM64State* vex_state )
   1797 {
   1798    UInt w32 = vex_state->guest_QCFLAG[0] | vex_state->guest_QCFLAG[1]
   1799               | vex_state->guest_QCFLAG[2] | vex_state->guest_QCFLAG[3];
   1800    ULong fpsr = 0;
   1801    // QC
   1802    if (w32 != 0)
   1803       fpsr |= (1 << 27);
   1804    return fpsr;
   1805 }
   1806 
   1807 void LibVEX_GuestARM64_set_fpsr ( /*MOD*/VexGuestARM64State* vex_state,
   1808                                   ULong fpsr )
   1809 {
   1810    // QC
   1811    vex_state->guest_QCFLAG[0] = (UInt)((fpsr >> 27) & 1);
   1812    vex_state->guest_QCFLAG[1] = 0;
   1813    vex_state->guest_QCFLAG[2] = 0;
   1814    vex_state->guest_QCFLAG[3] = 0;
   1815 }
   1816 
   1817 /* VISIBLE TO LIBVEX CLIENT */
   1818 void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
   1819 {
   1820    vex_bzero(vex_state, sizeof(*vex_state));
   1821 //ZZ    vex_state->host_EvC_FAILADDR = 0;
   1822 //ZZ    vex_state->host_EvC_COUNTER = 0;
   1823 //ZZ
   1824 //ZZ    vex_state->guest_R0  = 0;
   1825 //ZZ    vex_state->guest_R1  = 0;
   1826 //ZZ    vex_state->guest_R2  = 0;
   1827 //ZZ    vex_state->guest_R3  = 0;
   1828 //ZZ    vex_state->guest_R4  = 0;
   1829 //ZZ    vex_state->guest_R5  = 0;
   1830 //ZZ    vex_state->guest_R6  = 0;
   1831 //ZZ    vex_state->guest_R7  = 0;
   1832 //ZZ    vex_state->guest_R8  = 0;
   1833 //ZZ    vex_state->guest_R9  = 0;
   1834 //ZZ    vex_state->guest_R10 = 0;
   1835 //ZZ    vex_state->guest_R11 = 0;
   1836 //ZZ    vex_state->guest_R12 = 0;
   1837 //ZZ    vex_state->guest_R13 = 0;
   1838 //ZZ    vex_state->guest_R14 = 0;
   1839 //ZZ    vex_state->guest_R15T = 0;  /* NB: implies ARM mode */
   1840 //ZZ
   1841    vex_state->guest_CC_OP   = ARM64G_CC_OP_COPY;
   1842 //ZZ    vex_state->guest_CC_DEP1 = 0;
   1843 //ZZ    vex_state->guest_CC_DEP2 = 0;
   1844 //ZZ    vex_state->guest_CC_NDEP = 0;
   1845 //ZZ    vex_state->guest_QFLAG32 = 0;
   1846 //ZZ    vex_state->guest_GEFLAG0 = 0;
   1847 //ZZ    vex_state->guest_GEFLAG1 = 0;
   1848 //ZZ    vex_state->guest_GEFLAG2 = 0;
   1849 //ZZ    vex_state->guest_GEFLAG3 = 0;
   1850 //ZZ
   1851 //ZZ    vex_state->guest_EMNOTE  = EmNote_NONE;
   1852 //ZZ    vex_state->guest_CMSTART = 0;
   1853 //ZZ    vex_state->guest_CMLEN   = 0;
   1854 //ZZ    vex_state->guest_NRADDR  = 0;
   1855 //ZZ    vex_state->guest_IP_AT_SYSCALL = 0;
   1856 //ZZ
   1857 //ZZ    vex_state->guest_D0  = 0;
   1858 //ZZ    vex_state->guest_D1  = 0;
   1859 //ZZ    vex_state->guest_D2  = 0;
   1860 //ZZ    vex_state->guest_D3  = 0;
   1861 //ZZ    vex_state->guest_D4  = 0;
   1862 //ZZ    vex_state->guest_D5  = 0;
   1863 //ZZ    vex_state->guest_D6  = 0;
   1864 //ZZ    vex_state->guest_D7  = 0;
   1865 //ZZ    vex_state->guest_D8  = 0;
   1866 //ZZ    vex_state->guest_D9  = 0;
   1867 //ZZ    vex_state->guest_D10 = 0;
   1868 //ZZ    vex_state->guest_D11 = 0;
   1869 //ZZ    vex_state->guest_D12 = 0;
   1870 //ZZ    vex_state->guest_D13 = 0;
   1871 //ZZ    vex_state->guest_D14 = 0;
   1872 //ZZ    vex_state->guest_D15 = 0;
   1873 //ZZ    vex_state->guest_D16 = 0;
   1874 //ZZ    vex_state->guest_D17 = 0;
   1875 //ZZ    vex_state->guest_D18 = 0;
   1876 //ZZ    vex_state->guest_D19 = 0;
   1877 //ZZ    vex_state->guest_D20 = 0;
   1878 //ZZ    vex_state->guest_D21 = 0;
   1879 //ZZ    vex_state->guest_D22 = 0;
   1880 //ZZ    vex_state->guest_D23 = 0;
   1881 //ZZ    vex_state->guest_D24 = 0;
   1882 //ZZ    vex_state->guest_D25 = 0;
   1883 //ZZ    vex_state->guest_D26 = 0;
   1884 //ZZ    vex_state->guest_D27 = 0;
   1885 //ZZ    vex_state->guest_D28 = 0;
   1886 //ZZ    vex_state->guest_D29 = 0;
   1887 //ZZ    vex_state->guest_D30 = 0;
   1888 //ZZ    vex_state->guest_D31 = 0;
   1889 //ZZ
   1890 //ZZ    /* ARM encoded; zero is the default as it happens (result flags
   1891 //ZZ       (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
   1892 //ZZ       all exns masked, all exn sticky bits cleared). */
   1893 //ZZ    vex_state->guest_FPSCR = 0;
   1894 //ZZ
   1895 //ZZ    vex_state->guest_TPIDRURO = 0;
   1896 //ZZ
   1897 //ZZ    /* Not in a Thumb IT block. */
   1898 //ZZ    vex_state->guest_ITSTATE = 0;
   1899 //ZZ
   1900 //ZZ    vex_state->padding1 = 0;
   1901 //ZZ    vex_state->padding2 = 0;
   1902 //ZZ    vex_state->padding3 = 0;
   1903 //ZZ    vex_state->padding4 = 0;
   1904 //ZZ    vex_state->padding5 = 0;
   1905 }
   1906 
   1907 
   1908 /*-----------------------------------------------------------*/
   1909 /*--- Describing the arm guest state, for the benefit     ---*/
   1910 /*--- of iropt and instrumenters.                         ---*/
   1911 /*-----------------------------------------------------------*/
   1912 
   1913 /* Figure out if any part of the guest state contained in minoff
   1914    .. maxoff requires precise memory exceptions.  If in doubt return
   1915    True (but this generates significantly slower code).
   1916 
   1917    We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
   1918    That might be overkill (for 29 and 30); I don't know.
   1919 */
   1920 Bool guest_arm64_state_requires_precise_mem_exns (
   1921         Int minoff, Int maxoff, VexRegisterUpdates pxControl
   1922      )
   1923 {
   1924    Int xsp_min = offsetof(VexGuestARM64State, guest_XSP);
   1925    Int xsp_max = xsp_min + 8 - 1;
   1926    Int pc_min  = offsetof(VexGuestARM64State, guest_PC);
   1927    Int pc_max  = pc_min + 8 - 1;
   1928 
   1929    if (maxoff < xsp_min || minoff > xsp_max) {
   1930       /* no overlap with xsp */
   1931       if (pxControl == VexRegUpdSpAtMemAccess)
   1932          return False; // We only need to check stack pointer.
   1933    } else {
   1934       return True;
   1935    }
   1936 
   1937    if (maxoff < pc_min || minoff > pc_max) {
   1938       /* no overlap with pc */
   1939    } else {
   1940       return True;
   1941    }
   1942 
   1943    /* Guessing that we need PX for FP, but I don't really know. */
   1944    Int x29_min = offsetof(VexGuestARM64State, guest_X29);
   1945    Int x29_max = x29_min + 8 - 1;
   1946 
   1947    if (maxoff < x29_min || minoff > x29_max) {
   1948       /* no overlap with x29 */
   1949    } else {
   1950       return True;
   1951    }
   1952 
   1953    /* Guessing that we need PX for LR, but I don't really know. */
   1954    Int x30_min = offsetof(VexGuestARM64State, guest_X30);
   1955    Int x30_max = x30_min + 8 - 1;
   1956 
   1957    if (maxoff < x30_min || minoff > x30_max) {
   1958       /* no overlap with r30 */
   1959    } else {
   1960       return True;
   1961    }
   1962 
   1963    return False;
   1964 }
   1965 
   1966 
   1967 #define ALWAYSDEFD(field)                             \
   1968     { offsetof(VexGuestARM64State, field),            \
   1969       (sizeof ((VexGuestARM64State*)0)->field) }
   1970 VexGuestLayout
   1971    arm64Guest_layout
   1972       = {
   1973           /* Total size of the guest state, in bytes. */
   1974           .total_sizeB = sizeof(VexGuestARM64State),
   1975 
   1976           /* Describe the stack pointer. */
   1977           .offset_SP = offsetof(VexGuestARM64State,guest_XSP),
   1978           .sizeof_SP = 8,
   1979 
   1980           /* Describe the instruction pointer. */
   1981           .offset_IP = offsetof(VexGuestARM64State,guest_PC),
   1982           .sizeof_IP = 8,
   1983 
   1984           /* Describe any sections to be regarded by Memcheck as
   1985              'always-defined'. */
   1986           .n_alwaysDefd = 9,
   1987 
   1988           /* flags thunk: OP is always defd, whereas DEP1 and DEP2
   1989              have to be tracked.  See detailed comment in gdefs.h on
   1990              meaning of thunk fields. */
   1991           .alwaysDefd
   1992              = { /* 0 */ ALWAYSDEFD(guest_PC),
   1993                  /* 1 */ ALWAYSDEFD(guest_CC_OP),
   1994                  /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
   1995                  /* 3 */ ALWAYSDEFD(guest_EMNOTE),
   1996                  /* 4 */ ALWAYSDEFD(guest_CMSTART),
   1997                  /* 5 */ ALWAYSDEFD(guest_CMLEN),
   1998                  /* 6 */ ALWAYSDEFD(guest_NRADDR),
   1999                  /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
   2000                  /* 8 */ ALWAYSDEFD(guest_TPIDR_EL0)
   2001                }
   2002         };
   2003 
   2004 
   2005 /*---------------------------------------------------------------*/
   2006 /*--- end                               guest_arm64_helpers.c ---*/
   2007 /*---------------------------------------------------------------*/
   2008