Home | History | Annotate | Download | only in target-i386
      1 /*
      2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
      3  *
      4  *  Copyright (c) 2003 Fabrice Bellard
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include <math.h>
     21 #include "cpu.h"
     22 #include "helper.h"
     23 #include "qemu/aes.h"
     24 #include "qemu/host-utils.h"
     25 
     26 #if !defined(CONFIG_USER_ONLY)
     27 #include "exec/softmmu_exec.h"
     28 #endif /* !defined(CONFIG_USER_ONLY) */
     29 
     30 #define RC_MASK         0xc00
     31 #define RC_NEAR         0x000
     32 #define RC_DOWN         0x400
     33 #define RC_UP           0x800
     34 #define RC_CHOP         0xc00
     35 
     36 #define MAXTAN 9223372036854775808.0
     37 
     38 /* the following deal with x86 long double-precision numbers */
     39 #define MAXEXPD 0x7fff
     40 #define EXPBIAS 16383
     41 #define EXPD(fp)        (fp.l.upper & 0x7fff)
     42 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
     43 #define MANTD(fp)       (fp.l.lower)
     44 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
     45 
     46 #define floatx80_lg2 make_floatx80( 0x3ffd, 0x9a209a84fbcff799LL )
     47 #define floatx80_l2e make_floatx80( 0x3fff, 0xb8aa3b295c17f0bcLL )
     48 #define floatx80_l2t make_floatx80( 0x4000, 0xd49a784bcd1b8afeLL )
     49 
     50 static const floatx80 f15rk[7] =
     51 {
     52     floatx80_zero,
     53     floatx80_one,
     54     floatx80_pi,
     55     floatx80_lg2,
     56     floatx80_ln2,
     57     floatx80_l2e,
     58     floatx80_l2t,
     59 };
     60 
     61 static inline void fpush(CPUX86State *env)
     62 {
     63     env->fpstt = (env->fpstt - 1) & 7;
     64     env->fptags[env->fpstt] = 0; /* validate stack entry */
     65 }
     66 
     67 static inline void fpop(CPUX86State *env)
     68 {
     69     env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
     70     env->fpstt = (env->fpstt + 1) & 7;
     71 }
     72 
     73 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr)
     74 {
     75     floatx80 temp;
     76 
     77     temp.low = cpu_ldq_data(env, ptr);
     78     temp.high = cpu_lduw_data(env, ptr + 8);
     79     return temp;
     80 }
     81 
     82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr)
     83 {
     84     cpu_stq_data(env, ptr, f.low);
     85     cpu_stw_data(env, ptr + 8, f.high);
     86 }
     87 
     88 #define FPUS_IE (1 << 0)
     89 #define FPUS_DE (1 << 1)
     90 #define FPUS_ZE (1 << 2)
     91 #define FPUS_OE (1 << 3)
     92 #define FPUS_UE (1 << 4)
     93 #define FPUS_PE (1 << 5)
     94 #define FPUS_SF (1 << 6)
     95 #define FPUS_SE (1 << 7)
     96 #define FPUS_B  (1 << 15)
     97 
     98 #define FPUC_EM 0x3f
     99 
    100 /* x87 FPU helpers */
    101 
    102 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
    103 {
    104     union {
    105         float64 f64;
    106         double d;
    107     } u;
    108 
    109     u.f64 = floatx80_to_float64(a, &env->fp_status);
    110     return u.d;
    111 }
    112 
    113 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
    114 {
    115     union {
    116         float64 f64;
    117         double d;
    118     } u;
    119 
    120     u.d = a;
    121     return float64_to_floatx80(u.f64, &env->fp_status);
    122 }
    123 
    124 static void fpu_set_exception(CPUX86State *env, int mask)
    125 {
    126     env->fpus |= mask;
    127     if (env->fpus & (~env->fpuc & FPUC_EM))
    128         env->fpus |= FPUS_SE | FPUS_B;
    129 }
    130 
    131 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
    132 {
    133     if (floatx80_is_zero(b)) {
    134         fpu_set_exception(env, FPUS_ZE);
    135     }
    136     return floatx80_div(a, b, &env->fp_status);
    137 }
    138 
    139 static void fpu_raise_exception(CPUX86State *env)
    140 {
    141     if (env->cr[0] & CR0_NE_MASK) {
    142         raise_exception(env, EXCP10_COPR);
    143     }
    144 #if !defined(CONFIG_USER_ONLY)
    145     else {
    146         cpu_set_ferr(env);
    147     }
    148 #endif
    149 }
    150 
    151 void helper_flds_FT0(CPUX86State *env, uint32_t val)
    152 {
    153     union {
    154         float32 f;
    155         uint32_t i;
    156     } u;
    157     u.i = val;
    158     FT0 = float32_to_floatx80(u.f, &env->fp_status);
    159 }
    160 
    161 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
    162 {
    163     union {
    164         float64 f;
    165         uint64_t i;
    166     } u;
    167     u.i = val;
    168     FT0 = float64_to_floatx80(u.f, &env->fp_status);
    169 }
    170 
    171 void helper_fildl_FT0(CPUX86State *env, int32_t val)
    172 {
    173     FT0 = int32_to_floatx80(val, &env->fp_status);
    174 }
    175 
    176 void helper_flds_ST0(CPUX86State *env, uint32_t val)
    177 {
    178     int new_fpstt;
    179     union {
    180         float32 f;
    181         uint32_t i;
    182     } u;
    183     new_fpstt = (env->fpstt - 1) & 7;
    184     u.i = val;
    185     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
    186     env->fpstt = new_fpstt;
    187     env->fptags[new_fpstt] = 0; /* validate stack entry */
    188 }
    189 
    190 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
    191 {
    192     int new_fpstt;
    193     union {
    194         float64 f;
    195         uint64_t i;
    196     } u;
    197     new_fpstt = (env->fpstt - 1) & 7;
    198     u.i = val;
    199     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
    200     env->fpstt = new_fpstt;
    201     env->fptags[new_fpstt] = 0; /* validate stack entry */
    202 }
    203 
    204 void helper_fildl_ST0(CPUX86State *env, int32_t val)
    205 {
    206     int new_fpstt;
    207     new_fpstt = (env->fpstt - 1) & 7;
    208     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
    209     env->fpstt = new_fpstt;
    210     env->fptags[new_fpstt] = 0; /* validate stack entry */
    211 }
    212 
    213 void helper_fildll_ST0(CPUX86State *env, int64_t val)
    214 {
    215     int new_fpstt;
    216     new_fpstt = (env->fpstt - 1) & 7;
    217     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
    218     env->fpstt = new_fpstt;
    219     env->fptags[new_fpstt] = 0; /* validate stack entry */
    220 }
    221 
    222 uint32_t helper_fsts_ST0(CPUX86State *env)
    223 {
    224     union {
    225         float32 f;
    226         uint32_t i;
    227     } u;
    228     u.f = floatx80_to_float32(ST0, &env->fp_status);
    229     return u.i;
    230 }
    231 
    232 uint64_t helper_fstl_ST0(CPUX86State *env)
    233 {
    234     union {
    235         float64 f;
    236         uint64_t i;
    237     } u;
    238     u.f = floatx80_to_float64(ST0, &env->fp_status);
    239     return u.i;
    240 }
    241 
    242 int32_t helper_fist_ST0(CPUX86State *env)
    243 {
    244     int32_t val;
    245     val = floatx80_to_int32(ST0, &env->fp_status);
    246     if (val != (int16_t)val)
    247         val = -32768;
    248     return val;
    249 }
    250 
    251 int32_t helper_fistl_ST0(CPUX86State *env)
    252 {
    253     int32_t val;
    254     val = floatx80_to_int32(ST0, &env->fp_status);
    255     return val;
    256 }
    257 
    258 int64_t helper_fistll_ST0(CPUX86State *env)
    259 {
    260     int64_t val;
    261     val = floatx80_to_int64(ST0, &env->fp_status);
    262     return val;
    263 }
    264 
    265 int32_t helper_fistt_ST0(CPUX86State *env)
    266 {
    267     int32_t val;
    268     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
    269     if (val != (int16_t)val)
    270         val = -32768;
    271     return val;
    272 }
    273 
    274 int32_t helper_fisttl_ST0(CPUX86State *env)
    275 {
    276     int32_t val;
    277     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
    278     return val;
    279 }
    280 
    281 int64_t helper_fisttll_ST0(CPUX86State *env)
    282 {
    283     int64_t val;
    284     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
    285     return val;
    286 }
    287 
    288 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
    289 {
    290     int new_fpstt;
    291     new_fpstt = (env->fpstt - 1) & 7;
    292     env->fpregs[new_fpstt].d = helper_fldt(env, ptr);
    293     env->fpstt = new_fpstt;
    294     env->fptags[new_fpstt] = 0; /* validate stack entry */
    295 }
    296 
    297 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
    298 {
    299     helper_fstt(env, ST0, ptr);
    300 }
    301 
    302 void helper_fpush(CPUX86State *env)
    303 {
    304     fpush(env);
    305 }
    306 
    307 void helper_fpop(CPUX86State *env)
    308 {
    309     fpop(env);
    310 }
    311 
    312 void helper_fdecstp(CPUX86State *env)
    313 {
    314     env->fpstt = (env->fpstt - 1) & 7;
    315     env->fpus &= (~0x4700);
    316 }
    317 
    318 void helper_fincstp(CPUX86State *env)
    319 {
    320     env->fpstt = (env->fpstt + 1) & 7;
    321     env->fpus &= (~0x4700);
    322 }
    323 
    324 /* FPU move */
    325 
    326 void helper_ffree_STN(CPUX86State *env, int st_index)
    327 {
    328     env->fptags[(env->fpstt + st_index) & 7] = 1;
    329 }
    330 
    331 void helper_fmov_ST0_FT0(CPUX86State *env)
    332 {
    333     ST0 = FT0;
    334 }
    335 
    336 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
    337 {
    338     FT0 = ST(st_index);
    339 }
    340 
    341 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
    342 {
    343     ST0 = ST(st_index);
    344 }
    345 
    346 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
    347 {
    348     ST(st_index) = ST0;
    349 }
    350 
    351 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
    352 {
    353     floatx80 tmp;
    354     tmp = ST(st_index);
    355     ST(st_index) = ST0;
    356     ST0 = tmp;
    357 }
    358 
    359 /* FPU operations */
    360 
    361 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
    362 
    363 void helper_fcom_ST0_FT0(CPUX86State *env)
    364 {
    365     int ret;
    366 
    367     ret = floatx80_compare(ST0, FT0, &env->fp_status);
    368     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
    369 }
    370 
    371 void helper_fucom_ST0_FT0(CPUX86State *env)
    372 {
    373     int ret;
    374 
    375     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
    376     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
    377 }
    378 
    379 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
    380 
    381 void helper_fcomi_ST0_FT0(CPUX86State *env)
    382 {
    383     int eflags;
    384     int ret;
    385 
    386     ret = floatx80_compare(ST0, FT0, &env->fp_status);
    387     eflags = helper_cc_compute_all(env, CC_OP);
    388     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
    389     CC_SRC = eflags;
    390 }
    391 
    392 void helper_fucomi_ST0_FT0(CPUX86State *env)
    393 {
    394     int eflags;
    395     int ret;
    396 
    397     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
    398     eflags = helper_cc_compute_all(env, CC_OP);
    399     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
    400     CC_SRC = eflags;
    401 }
    402 
    403 void helper_fadd_ST0_FT0(CPUX86State *env)
    404 {
    405     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
    406 }
    407 
    408 void helper_fmul_ST0_FT0(CPUX86State *env)
    409 {
    410     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
    411 }
    412 
    413 void helper_fsub_ST0_FT0(CPUX86State *env)
    414 {
    415     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
    416 }
    417 
    418 void helper_fsubr_ST0_FT0(CPUX86State *env)
    419 {
    420     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
    421 }
    422 
    423 void helper_fdiv_ST0_FT0(CPUX86State *env)
    424 {
    425     ST0 = helper_fdiv(env, ST0, FT0);
    426 }
    427 
    428 void helper_fdivr_ST0_FT0(CPUX86State *env)
    429 {
    430     ST0 = helper_fdiv(env, FT0, ST0);
    431 }
    432 
    433 /* fp operations between STN and ST0 */
    434 
    435 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
    436 {
    437     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
    438 }
    439 
    440 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
    441 {
    442     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
    443 }
    444 
    445 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
    446 {
    447     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
    448 }
    449 
    450 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
    451 {
    452     floatx80 *p;
    453     p = &ST(st_index);
    454     *p = floatx80_sub(ST0, *p, &env->fp_status);
    455 }
    456 
    457 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
    458 {
    459     floatx80 *p;
    460     p = &ST(st_index);
    461     *p = helper_fdiv(env, *p, ST0);
    462 }
    463 
    464 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
    465 {
    466     floatx80 *p;
    467     p = &ST(st_index);
    468     *p = helper_fdiv(env, ST0, *p);
    469 }
    470 
    471 /* misc FPU operations */
    472 void helper_fchs_ST0(CPUX86State *env)
    473 {
    474     ST0 = floatx80_chs(ST0);
    475 }
    476 
    477 void helper_fabs_ST0(CPUX86State *env)
    478 {
    479     ST0 = floatx80_abs(ST0);
    480 }
    481 
    482 void helper_fld1_ST0(CPUX86State *env)
    483 {
    484     ST0 = f15rk[1];
    485 }
    486 
    487 void helper_fldl2t_ST0(CPUX86State *env)
    488 {
    489     ST0 = f15rk[6];
    490 }
    491 
    492 void helper_fldl2e_ST0(CPUX86State *env)
    493 {
    494     ST0 = f15rk[5];
    495 }
    496 
    497 void helper_fldpi_ST0(CPUX86State *env)
    498 {
    499     ST0 = f15rk[2];
    500 }
    501 
    502 void helper_fldlg2_ST0(CPUX86State *env)
    503 {
    504     ST0 = f15rk[3];
    505 }
    506 
    507 void helper_fldln2_ST0(CPUX86State *env)
    508 {
    509     ST0 = f15rk[4];
    510 }
    511 
    512 void helper_fldz_ST0(CPUX86State *env)
    513 {
    514     ST0 = f15rk[0];
    515 }
    516 
    517 void helper_fldz_FT0(CPUX86State *env)
    518 {
    519     FT0 = f15rk[0];
    520 }
    521 
    522 uint32_t helper_fnstsw(CPUX86State *env)
    523 {
    524     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    525 }
    526 
    527 uint32_t helper_fnstcw(CPUX86State *env)
    528 {
    529     return env->fpuc;
    530 }
    531 
    532 static void update_fp_status(CPUX86State *env)
    533 {
    534     int rnd_type;
    535 
    536     /* set rounding mode */
    537     switch(env->fpuc & RC_MASK) {
    538     default:
    539     case RC_NEAR:
    540         rnd_type = float_round_nearest_even;
    541         break;
    542     case RC_DOWN:
    543         rnd_type = float_round_down;
    544         break;
    545     case RC_UP:
    546         rnd_type = float_round_up;
    547         break;
    548     case RC_CHOP:
    549         rnd_type = float_round_to_zero;
    550         break;
    551     }
    552     set_float_rounding_mode(rnd_type, &env->fp_status);
    553     switch((env->fpuc >> 8) & 3) {
    554     case 0:
    555         rnd_type = 32;
    556         break;
    557     case 2:
    558         rnd_type = 64;
    559         break;
    560     case 3:
    561     default:
    562         rnd_type = 80;
    563         break;
    564     }
    565     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
    566 }
    567 
    568 void helper_fldcw(CPUX86State *env, uint32_t val)
    569 {
    570     env->fpuc = val;
    571     update_fp_status(env);
    572 }
    573 
    574 void helper_fclex(CPUX86State *env)
    575 {
    576     env->fpus &= 0x7f00;
    577 }
    578 
    579 void helper_fwait(CPUX86State *env)
    580 {
    581     if (env->fpus & FPUS_SE)
    582         fpu_raise_exception(env);
    583 }
    584 
    585 void helper_fninit(CPUX86State *env)
    586 {
    587     env->fpus = 0;
    588     env->fpstt = 0;
    589     env->fpuc = 0x37f;
    590     env->fptags[0] = 1;
    591     env->fptags[1] = 1;
    592     env->fptags[2] = 1;
    593     env->fptags[3] = 1;
    594     env->fptags[4] = 1;
    595     env->fptags[5] = 1;
    596     env->fptags[6] = 1;
    597     env->fptags[7] = 1;
    598 }
    599 
    600 /* BCD ops */
    601 
    602 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
    603 {
    604     floatx80 tmp;
    605     uint64_t val;
    606     unsigned int v;
    607     int i;
    608 
    609     val = 0;
    610     for(i = 8; i >= 0; i--) {
    611         v = cpu_ldub_data(env, ptr + i);
    612         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
    613     }
    614     tmp = int64_to_floatx80(val, &env->fp_status);
    615     if (cpu_ldub_data(env, ptr + 9) & 0x80) {
    616         floatx80_chs(tmp);
    617     }
    618     fpush(env);
    619     ST0 = tmp;
    620 }
    621 
    622 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
    623 {
    624     int v;
    625     target_ulong mem_ref, mem_end;
    626     int64_t val;
    627 
    628     val = floatx80_to_int64(ST0, &env->fp_status);
    629     mem_ref = ptr;
    630     mem_end = mem_ref + 9;
    631     if (val < 0) {
    632         cpu_stb_data(env, mem_end, 0x80);
    633         val = -val;
    634     } else {
    635         cpu_stb_data(env, mem_end, 0x00);
    636     }
    637     while (mem_ref < mem_end) {
    638         if (val == 0)
    639             break;
    640         v = val % 100;
    641         val = val / 100;
    642         v = ((v / 10) << 4) | (v % 10);
    643         cpu_stb_data(env, mem_ref++, v);
    644     }
    645     while (mem_ref < mem_end) {
    646         cpu_stb_data(env, mem_ref++, 0);
    647     }
    648 }
    649 
    650 void helper_f2xm1(CPUX86State *env)
    651 {
    652     double val = floatx80_to_double(env, ST0);
    653     val = pow(2.0, val) - 1.0;
    654     ST0 = double_to_floatx80(env, val);
    655 }
    656 
    657 void helper_fyl2x(CPUX86State *env)
    658 {
    659     double fptemp = floatx80_to_double(env, ST0);
    660 
    661     if (fptemp>0.0){
    662         fptemp = log(fptemp)/log(2.0);   /* log2(ST) */
    663         fptemp *= floatx80_to_double(env, ST1);
    664         ST1 = double_to_floatx80(env, fptemp);
    665         fpop(env);
    666     } else {
    667         env->fpus &= (~0x4700);
    668         env->fpus |= 0x400;
    669     }
    670 }
    671 
    672 void helper_fptan(CPUX86State *env)
    673 {
    674     double fptemp = floatx80_to_double(env, ST0);
    675 
    676     if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
    677         env->fpus |= 0x400;
    678     } else {
    679         fptemp = tan(fptemp);
    680         ST0 = double_to_floatx80(env, fptemp);
    681         fpush(env);
    682         ST0 = floatx80_one;
    683         env->fpus &= (~0x400);  /* C2 <-- 0 */
    684         /* the above code is for  |arg| < 2**52 only */
    685     }
    686 }
    687 
    688 void helper_fpatan(CPUX86State *env)
    689 {
    690     double fptemp, fpsrcop;
    691 
    692     fpsrcop = floatx80_to_double(env, ST1);
    693     fptemp = floatx80_to_double(env, ST0);
    694     ST1 = double_to_floatx80(env, atan2(fpsrcop,fptemp));
    695     fpop(env);
    696 }
    697 
    698 void helper_fxtract(CPUX86State *env)
    699 {
    700     CPU_LDoubleU temp;
    701     unsigned int expdif;
    702 
    703     temp.d = ST0;
    704     expdif = EXPD(temp) - EXPBIAS;
    705     /*DP exponent bias*/
    706     ST0 = int32_to_floatx80(expdif, &env->fp_status);
    707     fpush(env);
    708     BIASEXPONENT(temp);
    709     ST0 = temp.d;
    710 }
    711 
    712 void helper_fprem1(CPUX86State *env)
    713 {
    714     double st0, st1, dblq, fpsrcop, fptemp;
    715     CPU_LDoubleU fpsrcop1, fptemp1;
    716     int expdif;
    717     signed long long int q;
    718 
    719     st0 = floatx80_to_double(env, ST0);
    720     st1 = floatx80_to_double(env, ST1);
    721 
    722     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
    723         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
    724         env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
    725         return;
    726     }
    727 
    728     fpsrcop = st0;
    729     fptemp = st1;
    730     fpsrcop1.d = ST0;
    731     fptemp1.d = ST1;
    732     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
    733 
    734     if (expdif < 0) {
    735         /* optimisation? taken from the AMD docs */
    736         env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
    737         /* ST0 is unchanged */
    738         return;
    739     }
    740 
    741     if (expdif < 53) {
    742         dblq = fpsrcop / fptemp;
    743         /* round dblq towards nearest integer */
    744         dblq = rint(dblq);
    745         st0 = fpsrcop - fptemp * dblq;
    746 
    747         /* convert dblq to q by truncating towards zero */
    748         if (dblq < 0.0)
    749            q = (signed long long int)(-dblq);
    750         else
    751            q = (signed long long int)dblq;
    752 
    753         env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
    754                                 /* (C0,C3,C1) <-- (q2,q1,q0) */
    755         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
    756         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
    757         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
    758     } else {
    759         env->fpus |= 0x400;  /* C2 <-- 1 */
    760         fptemp = pow(2.0, expdif - 50);
    761         fpsrcop = (st0 / st1) / fptemp;
    762         /* fpsrcop = integer obtained by chopping */
    763         fpsrcop = (fpsrcop < 0.0) ?
    764                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
    765         st0 -= (st1 * fpsrcop * fptemp);
    766     }
    767     ST0 = double_to_floatx80(env, st0);
    768 }
    769 
    770 void helper_fprem(CPUX86State *env)
    771 {
    772     double st0, st1, dblq, fpsrcop, fptemp;
    773     CPU_LDoubleU fpsrcop1, fptemp1;
    774     int expdif;
    775     signed long long int q;
    776 
    777     st0 = floatx80_to_double(env, ST0);
    778     st1 = floatx80_to_double(env, ST1);
    779 
    780     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
    781        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
    782        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
    783        return;
    784     }
    785 
    786     fpsrcop = st0;
    787     fptemp = st1;
    788     fpsrcop1.d = ST0;
    789     fptemp1.d = ST1;
    790     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
    791 
    792     if (expdif < 0) {
    793         /* optimisation? taken from the AMD docs */
    794         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
    795         /* ST0 is unchanged */
    796         return;
    797     }
    798 
    799     if (expdif < 53) {
    800         dblq = fpsrcop / fptemp; /* ST0 / ST1*/;
    801         /* round dblq towards zero */
    802         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
    803         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
    804 
    805         /* convert dblq to q by truncating towards zero */
    806         if (dblq < 0.0) {
    807            q = (signed long long int)(-dblq);
    808         } else {
    809            q = (signed long long int)dblq;
    810         }
    811 
    812         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
    813                               /* (C0,C3,C1) <-- (q2,q1,q0) */
    814         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
    815         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
    816         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
    817     } else {
    818         int N = 32 + (expdif % 32); /* as per AMD docs */
    819         env->fpus |= 0x400;  /* C2 <-- 1 */
    820         fptemp = pow(2.0, (double)(expdif - N));
    821         fpsrcop = (st0 / st1) / fptemp;
    822         /* fpsrcop = integer obtained by chopping */
    823         fpsrcop = (fpsrcop < 0.0) ?
    824                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
    825         st0 -= (st1 * fpsrcop * fptemp);
    826     }
    827     ST0 = double_to_floatx80(env, st0);
    828 }
    829 
    830 void helper_fyl2xp1(CPUX86State *env)
    831 {
    832     double fptemp = floatx80_to_double(env, ST0);
    833 
    834     if ((fptemp+1.0)>0.0) {
    835         fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
    836         fptemp *= floatx80_to_double(env, ST1);
    837         ST1 = double_to_floatx80(env, fptemp);
    838         fpop(env);
    839     } else {
    840         env->fpus &= (~0x4700);
    841         env->fpus |= 0x400;
    842     }
    843 }
    844 
    845 void helper_fsqrt(CPUX86State *env)
    846 {
    847     double fptemp = floatx80_to_double(env, ST0);
    848 
    849     if (fptemp<0.0) {
    850         env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
    851         env->fpus |= 0x400;
    852     }
    853     ST0 = floatx80_sqrt(ST0, &env->fp_status);
    854 }
    855 
    856 void helper_fsincos(CPUX86State *env)
    857 {
    858     double fptemp = floatx80_to_double(env, ST0);
    859 
    860     if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
    861         env->fpus |= 0x400;
    862     } else {
    863         ST0 = double_to_floatx80(env, sin(fptemp));
    864         fpush(env);
    865         ST0 = double_to_floatx80(env, cos(fptemp));
    866         env->fpus &= (~0x400);  /* C2 <-- 0 */
    867         /* the above code is for  |arg| < 2**63 only */
    868     }
    869 }
    870 
    871 void helper_frndint(CPUX86State *env)
    872 {
    873     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
    874 }
    875 
    876 void helper_fscale(CPUX86State *env)
    877 {
    878     double st0 = floatx80_to_double(env, ST0);
    879     double st1 = floatx80_to_double(env, ST1);
    880     double val = ldexp(st0, (int)st1);
    881     ST0 = double_to_floatx80(env, val);
    882 }
    883 
    884 void helper_fsin(CPUX86State *env)
    885 {
    886     double fptemp = floatx80_to_double(env, ST0);
    887 
    888     if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
    889         env->fpus |= 0x400;
    890     } else {
    891         ST0 = double_to_floatx80(env, sin(fptemp));
    892         env->fpus &= (~0x400);  /* C2 <-- 0 */
    893         /* the above code is for  |arg| < 2**53 only */
    894     }
    895 }
    896 
    897 void helper_fcos(CPUX86State *env)
    898 {
    899     double fptemp = floatx80_to_double(env, ST0);
    900 
    901     if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
    902         env->fpus |= 0x400;
    903     } else {
    904         ST0 = double_to_floatx80(env, cos(fptemp));
    905         env->fpus &= (~0x400);  /* C2 <-- 0 */
    906         /* the above code is for  |arg5 < 2**63 only */
    907     }
    908 }
    909 
    910 void helper_fxam_ST0(CPUX86State *env)
    911 {
    912     CPU_LDoubleU temp;
    913     int expdif;
    914 
    915     temp.d = ST0;
    916 
    917     env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
    918     if (SIGND(temp))
    919         env->fpus |= 0x200; /* C1 <-- 1 */
    920 
    921     /* XXX: test fptags too */
    922     expdif = EXPD(temp);
    923     if (expdif == MAXEXPD) {
    924         if (MANTD(temp) == 0x8000000000000000ULL) {
    925             env->fpus |=  0x500 /*Infinity*/;
    926         } else {
    927             env->fpus |=  0x100 /*NaN*/;
    928         }
    929     } else if (expdif == 0) {
    930         if (MANTD(temp) == 0) {
    931             env->fpus |=  0x4000 /*Zero*/;
    932         } else {
    933             env->fpus |= 0x4400 /*Denormal*/;
    934         }
    935     } else {
    936         env->fpus |= 0x400;
    937     }
    938 }
    939 
    940 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
    941 {
    942     int fpus, fptag, exp, i;
    943     uint64_t mant;
    944     CPU_LDoubleU tmp;
    945 
    946     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    947     fptag = 0;
    948     for (i=7; i>=0; i--) {
    949         fptag <<= 2;
    950         if (env->fptags[i]) {
    951             fptag |= 3;
    952         } else {
    953             tmp.d = env->fpregs[i].d;
    954             exp = EXPD(tmp);
    955             mant = MANTD(tmp);
    956             if (exp == 0 && mant == 0) {
    957                 /* zero */
    958                 fptag |= 1;
    959             } else if (exp == 0 || exp == MAXEXPD
    960                        || (mant & (1LL << 63)) == 0) {
    961                 /* NaNs, infinity, denormal */
    962                 fptag |= 2;
    963             }
    964         }
    965     }
    966     if (data32) {
    967         /* 32 bit */
    968         cpu_stl_data(env, ptr, env->fpuc);
    969         cpu_stl_data(env, ptr + 4, fpus);
    970         cpu_stl_data(env, ptr + 8, fptag);
    971         cpu_stl_data(env, ptr + 12, 0); /* fpip */
    972         cpu_stl_data(env, ptr + 16, 0); /* fpcs */
    973         cpu_stl_data(env, ptr + 20, 0); /* fpoo */
    974         cpu_stl_data(env, ptr + 24, 0); /* fpos */
    975     } else {
    976         /* 16 bit */
    977         cpu_stw_data(env, ptr, env->fpuc);
    978         cpu_stw_data(env, ptr + 2, fpus);
    979         cpu_stw_data(env, ptr + 4, fptag);
    980         cpu_stw_data(env, ptr + 6, 0);
    981         cpu_stw_data(env, ptr + 8, 0);
    982         cpu_stw_data(env, ptr + 10, 0);
    983         cpu_stw_data(env, ptr + 12, 0);
    984     }
    985 }
    986 
    987 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
    988 {
    989     int i, fpus, fptag;
    990 
    991     if (data32) {
    992         env->fpuc = cpu_lduw_data(env, ptr);
    993         fpus = cpu_lduw_data(env, ptr + 4);
    994         fptag = cpu_lduw_data(env, ptr + 8);
    995     }
    996     else {
    997         env->fpuc = cpu_lduw_data(env, ptr);
    998         fpus = cpu_lduw_data(env, ptr + 2);
    999         fptag = cpu_lduw_data(env, ptr + 4);
   1000     }
   1001     env->fpstt = (fpus >> 11) & 7;
   1002     env->fpus = fpus & ~0x3800;
   1003     for(i = 0;i < 8; i++) {
   1004         env->fptags[i] = ((fptag & 3) == 3);
   1005         fptag >>= 2;
   1006     }
   1007 }
   1008 
   1009 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
   1010 {
   1011     floatx80 tmp;
   1012     int i;
   1013 
   1014     helper_fstenv(env, ptr, data32);
   1015 
   1016     ptr += (14 << data32);
   1017     for(i = 0;i < 8; i++) {
   1018         tmp = ST(i);
   1019         helper_fstt(env, tmp, ptr);
   1020         ptr += 10;
   1021     }
   1022 
   1023     /* fninit */
   1024     env->fpus = 0;
   1025     env->fpstt = 0;
   1026     env->fpuc = 0x37f;
   1027     env->fptags[0] = 1;
   1028     env->fptags[1] = 1;
   1029     env->fptags[2] = 1;
   1030     env->fptags[3] = 1;
   1031     env->fptags[4] = 1;
   1032     env->fptags[5] = 1;
   1033     env->fptags[6] = 1;
   1034     env->fptags[7] = 1;
   1035 }
   1036 
   1037 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
   1038 {
   1039     floatx80 tmp;
   1040     int i;
   1041 
   1042     helper_fldenv(env, ptr, data32);
   1043     ptr += (14 << data32);
   1044 
   1045     for(i = 0;i < 8; i++) {
   1046         tmp = helper_fldt(env, ptr);
   1047         ST(i) = tmp;
   1048         ptr += 10;
   1049     }
   1050 }
   1051 
   1052 void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
   1053 {
   1054     int fpus, fptag, i, nb_xmm_regs;
   1055     floatx80 tmp;
   1056     target_ulong addr;
   1057 
   1058     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
   1059     fptag = 0;
   1060     for(i = 0; i < 8; i++) {
   1061         fptag |= (env->fptags[i] << i);
   1062     }
   1063     cpu_stw_data(env, ptr, env->fpuc);
   1064     cpu_stw_data(env, ptr + 2, fpus);
   1065     cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
   1066 #ifdef TARGET_X86_64
   1067     if (data64) {
   1068         cpu_stq_data(env, ptr + 0x08, 0); /* rip */
   1069         cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
   1070     } else
   1071 #endif
   1072     {
   1073         cpu_stl_data(env, ptr + 0x08, 0); /* eip */
   1074         cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
   1075         cpu_stl_data(env, ptr + 0x10, 0); /* dp */
   1076         cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
   1077     }
   1078 
   1079     addr = ptr + 0x20;
   1080     for(i = 0;i < 8; i++) {
   1081         tmp = ST(i);
   1082         helper_fstt(env, tmp, addr);
   1083         addr += 16;
   1084     }
   1085 
   1086     if (env->cr[4] & CR4_OSFXSR_MASK) {
   1087         /* XXX: finish it */
   1088         cpu_stl_data(env, ptr + 0x18, env->mxcsr); /* mxcsr */
   1089         cpu_stl_data(env, ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
   1090         if (env->hflags & HF_CS64_MASK)
   1091             nb_xmm_regs = 16;
   1092         else
   1093             nb_xmm_regs = 8;
   1094         addr = ptr + 0xa0;
   1095         /* Fast FXSAVE leaves out the XMM registers */
   1096         if (!(env->efer & MSR_EFER_FFXSR)
   1097           || (env->hflags & HF_CPL_MASK)
   1098           || !(env->hflags & HF_LMA_MASK)) {
   1099             for(i = 0; i < nb_xmm_regs; i++) {
   1100                 cpu_stq_data(env, addr, env->xmm_regs[i].XMM_Q(0));
   1101                 cpu_stq_data(env, addr + 8, env->xmm_regs[i].XMM_Q(1));
   1102                 addr += 16;
   1103             }
   1104         }
   1105     }
   1106 }
   1107 
   1108 void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
   1109 {
   1110     int i, fpus, fptag, nb_xmm_regs;
   1111     floatx80 tmp;
   1112     target_ulong addr;
   1113 
   1114     env->fpuc = cpu_lduw_data(env, ptr);
   1115     fpus = cpu_lduw_data(env, ptr + 2);
   1116     fptag = cpu_lduw_data(env, ptr + 4);
   1117     env->fpstt = (fpus >> 11) & 7;
   1118     env->fpus = fpus & ~0x3800;
   1119     fptag ^= 0xff;
   1120     for(i = 0;i < 8; i++) {
   1121         env->fptags[i] = ((fptag >> i) & 1);
   1122     }
   1123 
   1124     addr = ptr + 0x20;
   1125     for(i = 0;i < 8; i++) {
   1126         tmp = helper_fldt(env, addr);
   1127         ST(i) = tmp;
   1128         addr += 16;
   1129     }
   1130 
   1131     if (env->cr[4] & CR4_OSFXSR_MASK) {
   1132         /* XXX: finish it */
   1133         env->mxcsr = cpu_ldl_data(env, ptr + 0x18);
   1134         //ldl(ptr + 0x1c);
   1135         if (env->hflags & HF_CS64_MASK)
   1136             nb_xmm_regs = 16;
   1137         else
   1138             nb_xmm_regs = 8;
   1139         addr = ptr + 0xa0;
   1140         /* Fast FXRESTORE leaves out the XMM registers */
   1141         if (!(env->efer & MSR_EFER_FFXSR)
   1142           || (env->hflags & HF_CPL_MASK)
   1143           || !(env->hflags & HF_LMA_MASK)) {
   1144             for(i = 0; i < nb_xmm_regs; i++) {
   1145                 env->xmm_regs[i].XMM_Q(0) = cpu_ldq_data(env, addr);
   1146                 env->xmm_regs[i].XMM_Q(1) = cpu_ldq_data(env, addr + 8);
   1147                 addr += 16;
   1148             }
   1149         }
   1150     }
   1151 }
   1152 
   1153 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
   1154 {
   1155     CPU_LDoubleU temp;
   1156 
   1157     temp.d = f;
   1158     *pmant = temp.l.lower;
   1159     *pexp = temp.l.upper;
   1160 }
   1161 
   1162 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
   1163 {
   1164     CPU_LDoubleU temp;
   1165 
   1166     temp.l.upper = upper;
   1167     temp.l.lower = mant;
   1168     return temp.d;
   1169 }
   1170 
   1171 /* MMX/SSE */
   1172 /* XXX: optimize by storing fptt and fptags in the static cpu state */
   1173 void helper_enter_mmx(CPUX86State *env)
   1174 {
   1175     env->fpstt = 0;
   1176     memset(env->fptags, 0, sizeof(env->fptags));
   1177 }
   1178 
   1179 void helper_emms(CPUX86State *env)
   1180 {
   1181     /* set to empty state */
   1182     memset(env->fptags, 1, sizeof(env->fptags));
   1183 }
   1184 
   1185 /* XXX: suppress */
   1186 void helper_movq(CPUX86State *env, void *d, void *s)
   1187 {
   1188     *(uint64_t *)d = *(uint64_t *)s;
   1189 }
   1190 
   1191 #define SHIFT 0
   1192 #include "ops_sse.h"
   1193 
   1194 #define SHIFT 1
   1195 #include "ops_sse.h"
   1196