Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 /*! \file LowerAlu.cpp
     19     \brief This file lowers ALU bytecodes.
     20 */
     21 #include "libdex/DexOpcodes.h"
     22 #include "libdex/DexFile.h"
     23 #include "Lower.h"
     24 #include "NcgAot.h"
     25 #include "enc_wrapper.h"
     26 
     27 /////////////////////////////////////////////
     28 #define P_GPR_1 PhysicalReg_EBX
     29 //! lower bytecode NEG_INT
     30 
     31 //!
     32 int op_neg_int() {
     33     u2 vA = INST_A(inst); //destination
     34     u2 vB = INST_B(inst);
     35     get_virtual_reg(vB, OpndSize_32, 1, false);
     36     alu_unary_reg(OpndSize_32, neg_opc, 1, false);
     37     set_virtual_reg(vA, OpndSize_32, 1, false);
     38     rPC += 1;
     39     return 0;
     40 }
     41 //! lower bytecode NOT_INT
     42 
     43 //!
     44 int op_not_int() {
     45     u2 vA = INST_A(inst); //destination
     46     u2 vB = INST_B(inst);
     47     get_virtual_reg(vB, OpndSize_32, 1, false);
     48     alu_unary_reg(OpndSize_32, not_opc, 1, false);
     49     set_virtual_reg(vA, OpndSize_32, 1, false);
     50     rPC += 1;
     51     return 0;
     52 }
     53 #undef P_GPR_1
     54 //! lower bytecode NEG_LONG
     55 
     56 //! This implementation uses XMM registers
     57 int op_neg_long() {
     58     u2 vA = INST_A(inst); //destination
     59     u2 vB = INST_B(inst);
     60     get_virtual_reg(vB, OpndSize_64, 1, false);
     61     alu_binary_reg_reg(OpndSize_64, xor_opc, 2, false, 2, false);
     62     alu_binary_reg_reg(OpndSize_64, sub_opc, 1, false, 2, false);
     63     set_virtual_reg(vA, OpndSize_64, 2, false);
     64     rPC += 1;
     65     return 0;
     66 }
     67 //! lower bytecode NOT_LONG
     68 
     69 //! This implementation uses XMM registers
     70 int op_not_long() {
     71     u2 vA = INST_A(inst); //destination
     72     u2 vB = INST_B(inst);
     73     get_virtual_reg(vB, OpndSize_64, 1, false);
     74     load_global_data_API("64bits", OpndSize_64, 2, false);
     75     alu_binary_reg_reg(OpndSize_64, andn_opc, 2, false, 1, false);
     76     set_virtual_reg(vA, OpndSize_64, 1, false);
     77     rPC += 1;
     78     return 0;
     79 }
     80 #define P_GPR_1 PhysicalReg_EBX
     81 //! lower bytecode NEG_FLOAT
     82 
     83 //! This implementation uses GPR
     84 int op_neg_float() {
     85     u2 vA = INST_A(inst); //destination
     86     u2 vB = INST_B(inst);
     87     get_virtual_reg(vB, OpndSize_32, 1, false);
     88     alu_binary_imm_reg(OpndSize_32, add_opc, 0x80000000, 1, false);
     89     set_virtual_reg(vA, OpndSize_32, 1, false);
     90     rPC += 1;
     91     return 0;
     92 }
     93 #undef P_GPR_1
     94 
     95 //! lower bytecode NEG_DOUBLE
     96 
     97 //! This implementation uses XMM registers
     98 int op_neg_double() {
     99     u2 vA = INST_A(inst); //destination
    100     u2 vB = INST_B(inst);
    101     get_virtual_reg(vB, OpndSize_64, 1, false);
    102     load_global_data_API("doubNeg", OpndSize_64, 2, false);
    103     alu_binary_reg_reg(OpndSize_64, xor_opc, 1, false, 2, false);
    104     set_virtual_reg(vA, OpndSize_64, 2, false);
    105     rPC += 1;
    106     return 0;
    107 }
    108 
    109 //! lower bytecode INT_TO_LONG
    110 
    111 //! It uses native instruction cdq
    112 int op_int_to_long() {
    113     u2 vA = INST_A(inst); //destination
    114     u2 vB = INST_B(inst);
    115     get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
    116     convert_integer(OpndSize_32, OpndSize_64);
    117     set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
    118     set_virtual_reg(vA+1, OpndSize_32, PhysicalReg_EDX, true);
    119     rPC += 1;
    120     return 0;
    121 }
    122 //! lower bytecode INT_TO_FLOAT
    123 
    124 //! This implementation uses FP stack
    125 int op_int_to_float() {
    126     u2 vA = INST_A(inst); //destination
    127     u2 vB = INST_B(inst);
    128     load_int_fp_stack_VR(OpndSize_32, vB); //fildl
    129     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
    130     rPC += 1;
    131     return 0;
    132 }
    133 //! lower bytecode INT_TO_DOUBLE
    134 
    135 //! This implementation uses FP stack
    136 int op_int_to_double() {
    137     u2 vA = INST_A(inst); //destination
    138     u2 vB = INST_B(inst);
    139     load_int_fp_stack_VR(OpndSize_32, vB); //fildl
    140     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
    141     rPC += 1;
    142     return 0;
    143 }
    144 //! lower bytecode LONG_TO_FLOAT
    145 
    146 //! This implementation uses FP stack
    147 int op_long_to_float() {
    148     u2 vA = INST_A(inst); //destination
    149     u2 vB = INST_B(inst);
    150     load_int_fp_stack_VR(OpndSize_64, vB); //fildll
    151     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
    152     rPC += 1;
    153     return 0;
    154 }
    155 //! lower bytecode LONG_TO_DOUBLE
    156 
    157 //! This implementation uses FP stack
    158 int op_long_to_double() {
    159     u2 vA = INST_A(inst); //destination
    160     u2 vB = INST_B(inst);
    161     load_int_fp_stack_VR(OpndSize_64, vB); //fildll
    162     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
    163     rPC += 1;
    164     return 0;
    165 }
    166 //! lower bytecode FLOAT_TO_DOUBLE
    167 
    168 //! This implementation uses FP stack
    169 int op_float_to_double() {
    170     u2 vA = INST_A(inst); //destination
    171     u2 vB = INST_B(inst);
    172     load_fp_stack_VR(OpndSize_32, vB); //flds
    173     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
    174     rPC += 1;
    175     return 0;
    176 }
    177 //! lower bytecode DOUBLE_TO_FLOAT
    178 
    179 //! This implementation uses FP stack
    180 int op_double_to_float() {
    181     u2 vA = INST_A(inst); //destination
    182     u2 vB = INST_B(inst);
    183     load_fp_stack_VR(OpndSize_64, vB); //fldl
    184     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
    185     rPC += 1;
    186     return 0;
    187 }
    188 #define P_GPR_1 PhysicalReg_EBX
    189 //! lower bytecode LONG_TO_INT
    190 
    191 //! This implementation uses GPR
    192 int op_long_to_int() {
    193     u2 vA = INST_A(inst); //destination
    194     u2 vB = INST_B(inst);
    195     get_virtual_reg(vB, OpndSize_32, 1, false);
    196     set_virtual_reg(vA, OpndSize_32, 1, false);
    197     rPC += 1;
    198     return 0;
    199 }
    200 #undef P_GPR_1
    201 
    202 //! common code to convert a float or double to integer
    203 
    204 //! It uses FP stack
    205 int common_fp_to_int(bool isDouble, u2 vA, u2 vB) {
    206     if(isDouble) {
    207         load_fp_stack_VR(OpndSize_64, vB); //fldl
    208     }
    209     else {
    210         load_fp_stack_VR(OpndSize_32, vB); //flds
    211     }
    212 
    213     load_fp_stack_global_data_API("intMax", OpndSize_32);
    214     load_fp_stack_global_data_API("intMin", OpndSize_32);
    215 
    216     //ST(0) ST(1) ST(2) --> LintMin LintMax value
    217     compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
    218     //ST(0) ST(1) --> LintMax value
    219     conditional_jump(Condition_AE, ".float_to_int_negInf", true);
    220     rememberState(1);
    221     compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
    222     //ST(0) --> value
    223     rememberState(2);
    224     conditional_jump(Condition_C, ".float_to_int_nanInf", true);
    225     //fnstcw, orw, fldcw, xorw
    226     load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    227     store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
    228     alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
    229     load_fpu_cw(0, PhysicalReg_ESP, true);
    230     alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
    231     store_int_fp_stack_VR(true/*pop*/, OpndSize_32, vA); //fistpl
    232     //fldcw
    233     load_fpu_cw(0, PhysicalReg_ESP, true);
    234     load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    235     rememberState(3);
    236     unconditional_jump(".float_to_int_okay", true);
    237     insertLabel(".float_to_int_nanInf", true);
    238     conditional_jump(Condition_NP, ".float_to_int_posInf", true);
    239     //fstps CHECK
    240     goToState(2);
    241     store_fp_stack_VR(true, OpndSize_32, vA);
    242     set_VR_to_imm(vA, OpndSize_32, 0);
    243     transferToState(3);
    244     unconditional_jump(".float_to_int_okay", true);
    245     insertLabel(".float_to_int_posInf", true);
    246     //fstps CHECK
    247     goToState(2);
    248     store_fp_stack_VR(true, OpndSize_32, vA);
    249     set_VR_to_imm(vA, OpndSize_32, 0x7fffffff);
    250     transferToState(3);
    251     unconditional_jump(".float_to_int_okay", true);
    252     insertLabel(".float_to_int_negInf", true);
    253     goToState(1);
    254     //fstps CHECK
    255     store_fp_stack_VR(true, OpndSize_32, vA);
    256     store_fp_stack_VR(true, OpndSize_32, vA);
    257     set_VR_to_imm(vA, OpndSize_32, 0x80000000);
    258     transferToState(3);
    259     insertLabel(".float_to_int_okay", true);
    260     return 0;
    261 }
    262 //! lower bytecode FLOAT_TO_INT by calling common_fp_to_int
    263 
    264 //!
    265 int op_float_to_int() {
    266     u2 vA = INST_A(inst); //destination
    267     u2 vB = INST_B(inst);
    268     int retval = common_fp_to_int(false, vA, vB);
    269     rPC += 1;
    270     return retval;
    271 }
    272 //! lower bytecode DOUBLE_TO_INT by calling common_fp_to_int
    273 
    274 //!
    275 int op_double_to_int() {
    276     u2 vA = INST_A(inst); //destination
    277     u2 vB = INST_B(inst);
    278     int retval = common_fp_to_int(true, vA, vB);
    279     rPC += 1;
    280     return retval;
    281 }
    282 
    283 //! common code to convert float or double to long
    284 
    285 //! It uses FP stack
    286 int common_fp_to_long(bool isDouble, u2 vA, u2 vB) {
    287     if(isDouble) {
    288         load_fp_stack_VR(OpndSize_64, vB); //fldl
    289     }
    290     else {
    291         load_fp_stack_VR(OpndSize_32, vB); //flds
    292     }
    293 
    294     //Check if it is the special Negative Infinity value
    295     load_fp_stack_global_data_API("valueNegInfLong", OpndSize_64);
    296     //Stack status: ST(0) ST(1) --> LlongMin value
    297     compare_fp_stack(true, 1, false/*isDouble*/); // Pops ST(1)
    298     conditional_jump(Condition_AE, ".float_to_long_negInf", true);
    299     rememberState(1);
    300 
    301     //Check if it is the special Positive Infinity value
    302     load_fp_stack_global_data_API("valuePosInfLong", OpndSize_64);
    303     //Stack status: ST(0) ST(1) --> LlongMax value
    304     compare_fp_stack(true, 1, false/*isDouble*/); // Pops ST(1)
    305     rememberState(2);
    306     conditional_jump(Condition_C, ".float_to_long_nanInf", true);
    307 
    308     //Normal Case
    309     //We want to truncate to 0 for conversion. That will be rounding mode 0x11
    310     load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    311     store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
    312     //Change control word to rounding mode 11:
    313     alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
    314     //Load the control word
    315     load_fpu_cw(0, PhysicalReg_ESP, true);
    316     //Reset the control word
    317     alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
    318     //Perform the actual conversion
    319     store_int_fp_stack_VR(true/*pop*/, OpndSize_64, vA); //fistpll
    320     // Restore the original control word
    321     load_fpu_cw(0, PhysicalReg_ESP, true);
    322     load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    323     rememberState(3);
    324     /* NOTE: We do not need to pop out the original value we pushed
    325      * since load_fpu_cw above already clears the stack for
    326      * normal values.
    327      */
    328     unconditional_jump(".float_to_long_okay", true);
    329 
    330     //We can be here for positive infinity or NaN. Check parity bit
    331     insertLabel(".float_to_long_nanInf", true);
    332     conditional_jump(Condition_NP, ".float_to_long_posInf", true);
    333     goToState(2);
    334     //Save corresponding Long NaN value
    335     load_global_data_API("valueNanLong", OpndSize_64, 1, false);
    336     set_virtual_reg(vA, OpndSize_64, 1, false);
    337     transferToState(3);
    338     //Pop out the original value we pushed
    339     compare_fp_stack(true, 0, false/*isDouble*/); //ST(0)
    340     unconditional_jump(".float_to_long_okay", true);
    341 
    342     insertLabel(".float_to_long_posInf", true);
    343     goToState(2);
    344     //Save corresponding Long Positive Infinity value
    345     load_global_data_API("valuePosInfLong", OpndSize_64, 2, false);
    346     set_virtual_reg(vA, OpndSize_64, 2, false);
    347     transferToState(3);
    348     //Pop out the original value we pushed
    349     compare_fp_stack(true, 0, false/*isDouble*/); //ST(0)
    350     unconditional_jump(".float_to_long_okay", true);
    351 
    352     insertLabel(".float_to_long_negInf", true);
    353     //fstpl
    354     goToState(1);
    355     //Load corresponding Long Negative Infinity value
    356     load_global_data_API("valueNegInfLong", OpndSize_64, 3, false);
    357     set_virtual_reg(vA, OpndSize_64, 3, false);
    358     transferToState(3);
    359     //Pop out the original value we pushed
    360     compare_fp_stack(true, 0, false/*isDouble*/); //ST(0)
    361 
    362     insertLabel(".float_to_long_okay", true);
    363     return 0;
    364 }
    365 //! lower bytecode FLOAT_TO_LONG by calling common_fp_to_long
    366 
    367 //!
    368 int op_float_to_long() {
    369     u2 vA = INST_A(inst); //destination
    370     u2 vB = INST_B(inst);
    371     int retval = common_fp_to_long(false, vA, vB);
    372     rPC += 1;
    373     return retval;
    374 }
    375 //! lower bytecode DOUBLE_TO_LONG by calling common_fp_to_long
    376 
    377 //!
    378 int op_double_to_long() {
    379     u2 vA = INST_A(inst); //destination
    380     u2 vB = INST_B(inst);
    381     int retval = common_fp_to_long(true, vA, vB);
    382     rPC += 1;
    383     return retval;
    384 }
    385 #define P_GPR_1 PhysicalReg_EBX
    386 //! lower bytecode INT_TO_BYTE
    387 
    388 //! It uses GPR
    389 int op_int_to_byte() {
    390     u2 vA = INST_A(inst); //destination
    391     u2 vB = INST_B(inst);
    392     get_virtual_reg(vB, OpndSize_32, 1, false);
    393     alu_binary_imm_reg(OpndSize_32, sal_opc, 24, 1, false);
    394     alu_binary_imm_reg(OpndSize_32, sar_opc, 24, 1, false);
    395     set_virtual_reg(vA, OpndSize_32, 1, false);
    396     rPC += 1;
    397     return 0;
    398 }
    399 //! lower bytecode INT_TO_CHAR
    400 
    401 //! It uses GPR
    402 int op_int_to_char() {
    403     u2 vA = INST_A(inst); //destination
    404     u2 vB = INST_B(inst);
    405     get_virtual_reg(vB, OpndSize_32, 1, false);
    406     alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
    407     alu_binary_imm_reg(OpndSize_32, shr_opc, 16, 1, false);
    408     set_virtual_reg(vA, OpndSize_32, 1, false);
    409     rPC += 1;
    410     return 0;
    411 }
    412 //! lower bytecode INT_TO_SHORT
    413 
    414 //! It uses GPR
    415 int op_int_to_short() {
    416     u2 vA = INST_A(inst); //destination
    417     u2 vB = INST_B(inst);
    418     get_virtual_reg(vB, OpndSize_32, 1, false);
    419     alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
    420     alu_binary_imm_reg(OpndSize_32, sar_opc, 16, 1, false);
    421     set_virtual_reg(vA, OpndSize_32, 1, false);
    422     rPC += 1;
    423     return 0;
    424 }
    425 //! common code to handle integer ALU ops
    426 
    427 //! It uses GPR
    428 int common_alu_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
    429     get_virtual_reg(v1, OpndSize_32, 1, false);
    430     //in encoder, reg is first operand, which is the destination
    431     //gpr_1 op v2(rFP) --> gpr_1
    432     //shift only works with reg cl, v2 should be in %ecx
    433     alu_binary_VR_reg(OpndSize_32, opc, v2, 1, false);
    434     set_virtual_reg(vA, OpndSize_32, 1, false);
    435     return 0;
    436 }
    437 #undef P_GPR_1
    438 #define P_GPR_1 PhysicalReg_EBX
    439 //! common code to handle integer shift ops
    440 
    441 //! It uses GPR
    442 int common_shift_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {
    443     get_virtual_reg(v2, OpndSize_32, PhysicalReg_ECX, true);
    444     get_virtual_reg(v1, OpndSize_32, 1, false);
    445     //in encoder, reg2 is first operand, which is the destination
    446     //gpr_1 op v2(rFP) --> gpr_1
    447     //shift only works with reg cl, v2 should be in %ecx
    448     alu_binary_reg_reg(OpndSize_32, opc, PhysicalReg_ECX, true, 1, false);
    449     set_virtual_reg(vA, OpndSize_32, 1, false);
    450     return 0;
    451 }
    452 #undef p_GPR_1
    453 //! lower bytecode ADD_INT by calling common_alu_int
    454 
    455 //!
    456 int op_add_int() {
    457     u2 vA, v1, v2;
    458     vA = INST_AA(inst);
    459     v1 = *((u1*)rPC + 2);
    460     v2 = *((u1*)rPC + 3);
    461     int retval = common_alu_int(add_opc, vA, v1, v2);
    462     rPC += 2;
    463     return retval;
    464 }
    465 //! lower bytecode SUB_INT by calling common_alu_int
    466 
    467 //!
    468 int op_sub_int() {
    469     u2 vA, v1, v2;
    470     vA = INST_AA(inst);
    471     v1 = *((u1*)rPC + 2);
    472     v2 = *((u1*)rPC + 3);
    473     int retval = common_alu_int(sub_opc, vA, v1, v2);
    474     rPC += 2;
    475     return retval;
    476 }
    477 //! lower bytecode MUL_INT by calling common_alu_int
    478 
    479 //!
    480 int op_mul_int() {
    481     u2 vA, v1, v2;
    482     vA = INST_AA(inst);
    483     v1 = *((u1*)rPC + 2);
    484     v2 = *((u1*)rPC + 3);
    485     int retval = common_alu_int(imul_opc, vA, v1, v2);
    486     rPC += 2;
    487     return retval;
    488 }
    489 //! lower bytecode AND_INT by calling common_alu_int
    490 
    491 //!
    492 int op_and_int() {
    493     u2 vA, v1, v2;
    494     vA = INST_AA(inst);
    495     v1 = *((u1*)rPC + 2);
    496     v2 = *((u1*)rPC + 3);
    497     int retval = common_alu_int(and_opc, vA, v1, v2);
    498     rPC += 2;
    499     return retval;
    500 }
    501 //! lower bytecode OR_INT by calling common_alu_int
    502 
    503 //!
    504 int op_or_int() {
    505     u2 vA, v1, v2;
    506     vA = INST_AA(inst);
    507     v1 = *((u1*)rPC + 2);
    508     v2 = *((u1*)rPC + 3);
    509     int retval = common_alu_int(or_opc, vA, v1, v2);
    510     rPC += 2;
    511     return retval;
    512 }
    513 //! lower bytecode XOR_INT by calling common_alu_int
    514 
    515 //!
    516 int op_xor_int() {
    517     u2 vA, v1, v2;
    518     vA = INST_AA(inst);
    519     v1 = *((u1*)rPC + 2);
    520     v2 = *((u1*)rPC + 3);
    521     int retval = common_alu_int(xor_opc, vA, v1, v2);
    522     rPC += 2;
    523     return retval;
    524 }
    525 //! lower bytecode SHL_INT by calling common_shift_int
    526 
    527 //!
    528 int op_shl_int() {
    529     u2 vA, v1, v2;
    530     vA = INST_AA(inst);
    531     v1 = *((u1*)rPC + 2);
    532     v2 = *((u1*)rPC + 3);
    533     int retval = common_shift_int(shl_opc, vA, v1, v2);
    534     rPC += 2;
    535     return retval;
    536 }
    537 //! lower bytecode SHR_INT by calling common_shift_int
    538 
    539 //!
    540 int op_shr_int() {
    541     u2 vA, v1, v2;
    542     vA = INST_AA(inst);
    543     v1 = *((u1*)rPC + 2);
    544     v2 = *((u1*)rPC + 3);
    545     int retval = common_shift_int(sar_opc, vA, v1, v2);
    546     rPC += 2;
    547     return retval;
    548 }
    549 //! lower bytecode USHR_INT by calling common_shift_int
    550 
    551 //!
    552 int op_ushr_int() {
    553     u2 vA, v1, v2;
    554     vA = INST_AA(inst);
    555     v1 = *((u1*)rPC + 2);
    556     v2 = *((u1*)rPC + 3);
    557     int retval = common_shift_int(shr_opc, vA, v1, v2);
    558     rPC += 2;
    559     return retval;
    560 }
    561 //! lower bytecode ADD_INT_2ADDR by calling common_alu_int
    562 
    563 //!
    564 int op_add_int_2addr() {
    565     u2 vA, v1, v2;
    566     vA = INST_A(inst);
    567     v1 = vA;
    568     v2 = INST_B(inst);
    569     int retval = common_alu_int(add_opc, vA, v1, v2);
    570     rPC += 1;
    571     return retval;
    572 }
    573 //! lower bytecode SUB_INT_2ADDR by calling common_alu_int
    574 
    575 //!
    576 int op_sub_int_2addr() {
    577     u2 vA, v1, v2;
    578     vA = INST_A(inst);
    579     v1 = vA;
    580     v2 = INST_B(inst);
    581     int retval = common_alu_int(sub_opc, vA, v1, v2);
    582     rPC += 1;
    583     return retval;
    584 }
    585 //! lower bytecode MUL_INT_2ADDR by calling common_alu_int
    586 
    587 //!
    588 int op_mul_int_2addr() {
    589     u2 vA, v1, v2;
    590     vA = INST_A(inst);
    591     v1 = vA;
    592     v2 = INST_B(inst);
    593     int retval = common_alu_int(imul_opc, vA, v1, v2);
    594     rPC += 1;
    595     return retval;
    596 }
    597 //! lower bytecode AND_INT_2ADDR by calling common_alu_int
    598 
    599 //!
    600 int op_and_int_2addr() {
    601     u2 vA, v1, v2;
    602     vA = INST_A(inst);
    603     v1 = vA;
    604     v2 = INST_B(inst);
    605     int retval = common_alu_int(and_opc, vA, v1, v2);
    606     rPC += 1;
    607     return retval;
    608 }
    609 //! lower bytecode OR_INT_2ADDR by calling common_alu_int
    610 
    611 //!
    612 int op_or_int_2addr() {
    613     u2 vA, v1, v2;
    614     vA = INST_A(inst);
    615     v1 = vA;
    616     v2 = INST_B(inst);
    617     int retval = common_alu_int(or_opc, vA, v1, v2);
    618     rPC += 1;
    619     return retval;
    620 }
    621 //! lower bytecode XOR_INT_2ADDR by calling common_alu_int
    622 
    623 //!
    624 int op_xor_int_2addr() {
    625     u2 vA, v1, v2;
    626     vA = INST_A(inst);
    627     v1 = vA;
    628     v2 = INST_B(inst);
    629     int retval = common_alu_int(xor_opc, vA, v1, v2);
    630     rPC += 1;
    631     return retval;
    632 }
    633 //! lower bytecode SHL_INT_2ADDR by calling common_shift_int
    634 
    635 //!
    636 int op_shl_int_2addr() {
    637     u2 vA, v1, v2;
    638     vA = INST_A(inst);
    639     v1 = vA;
    640     v2 = INST_B(inst);
    641     int retval = common_shift_int(shl_opc, vA, v1, v2);
    642     rPC += 1;
    643     return retval;
    644 }
    645 //! lower bytecode SHR_INT_2ADDR by calling common_shift_int
    646 
    647 //!
    648 int op_shr_int_2addr() {
    649     u2 vA, v1, v2;
    650     vA = INST_A(inst);
    651     v1 = vA;
    652     v2 = INST_B(inst);
    653     int retval = common_shift_int(sar_opc, vA, v1, v2);
    654     rPC += 1;
    655     return retval;
    656 }
    657 //! lower bytecode USHR_INT_2ADDR by calling common_shift_int
    658 
    659 //!
    660 int op_ushr_int_2addr() {
    661     u2 vA, v1, v2;
    662     vA = INST_A(inst);
    663     v1 = vA;
    664     v2 = INST_B(inst);
    665     int retval = common_shift_int(shr_opc, vA, v1, v2);
    666     rPC += 1;
    667     return retval;
    668 }
    669 #define P_GPR_1 PhysicalReg_EBX
    670 //!common code to handle integer DIV & REM, it used GPR
    671 
    672 //!The special case: when op0 == minint && op1 == -1, return 0 for isRem, return 0x80000000 for isDiv
    673 //!There are two merge points in the control flow for this bytecode
    674 //!make sure the reg. alloc. state is the same at merge points by calling transferToState
    675 int common_div_rem_int(bool isRem, u2 vA, u2 v1, u2 v2) {
    676     get_virtual_reg(v1, OpndSize_32, PhysicalReg_EAX, true);
    677     get_virtual_reg(v2, OpndSize_32, 2, false);
    678     compare_imm_reg(OpndSize_32, 0, 2, false);
    679     handlePotentialException(
    680                                        Condition_E, Condition_NE,
    681                                        1, "common_errDivideByZero");
    682     /////////////////// handle special cases
    683     //conditional move 0 to $edx for rem for the two special cases
    684     //conditional move 0x80000000 to $eax for div
    685     //handle -1 special case divide error
    686     compare_imm_reg(OpndSize_32, -1, 2, false);
    687     conditional_jump(Condition_NE, ".common_div_rem_int_normal", true);
    688     //handle min int special case divide error
    689     rememberState(1);
    690     compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
    691     transferToState(1);
    692     conditional_jump(Condition_E, ".common_div_rem_int_special", true);
    693 
    694     insertLabel(".common_div_rem_int_normal", true); //merge point
    695     convert_integer(OpndSize_32, OpndSize_64); //cdq
    696     //idiv: dividend in edx:eax; quotient in eax; remainder in edx
    697     alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
    698     if(isRem)
    699         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
    700     else //divide: quotient in %eax
    701         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
    702     rememberState(2);
    703     unconditional_jump(".common_div_rem_int_okay", true);
    704 
    705     insertLabel(".common_div_rem_int_special", true);
    706     goToState(1);
    707     if(isRem)
    708         set_VR_to_imm(vA, OpndSize_32, 0);
    709     else
    710         set_VR_to_imm(vA, OpndSize_32, 0x80000000);
    711     transferToState(2);
    712     insertLabel(".common_div_rem_int_okay", true); //merge point 2
    713     return 0;
    714 }
    715 #undef P_GPR_1
    716 //! lower bytecode DIV_INT by calling common_div_rem_int
    717 
    718 //!
    719 int op_div_int() {
    720     u2 vA, v1, v2;
    721     vA = INST_AA(inst);
    722     v1 = *((u1*)rPC + 2);
    723     v2 = *((u1*)rPC + 3);
    724     int retval = common_div_rem_int(false, vA, v1, v2);
    725     rPC += 2;
    726     return retval;
    727 }
    728 //! lower bytecode REM_INT by calling common_div_rem_int
    729 
    730 //!
    731 int op_rem_int() {
    732     u2 vA, v1, v2;
    733     vA = INST_AA(inst);
    734     v1 = *((u1*)rPC + 2);
    735     v2 = *((u1*)rPC + 3);
    736     int retval = common_div_rem_int(true, vA, v1, v2);
    737     rPC += 2;
    738     return retval;
    739 }
    740 //! lower bytecode DIV_INT_2ADDR by calling common_div_rem_int
    741 
    742 //!
    743 int op_div_int_2addr() {
    744     u2 vA = INST_A(inst);
    745     u2 v1 = vA;
    746     u2 v2 = INST_B(inst);
    747     int retval = common_div_rem_int(false, vA, v1, v2);
    748     rPC += 1;
    749     return retval;
    750 }
    751 //! lower bytecode REM_INT_2ADDR by calling common_div_rem_int
    752 
    753 //!
    754 int op_rem_int_2addr() {
    755     u2 vA = INST_A(inst);
    756     u2 v1 = vA;
    757     u2 v2 = INST_B(inst);
    758     int retval = common_div_rem_int(true, vA, v1, v2);
    759     rPC += 1;
    760     return retval;
    761 }
    762 
    763 #define P_GPR_1 PhysicalReg_EBX
    764 //! common code to handle integer ALU ops with literal
    765 
    766 //! It uses GPR
    767 int common_alu_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { //except div and rem
    768     get_virtual_reg(vB, OpndSize_32, 1, false);
    769     alu_binary_imm_reg(OpndSize_32, opc, imm, 1, false);
    770     set_virtual_reg(vA, OpndSize_32, 1, false);
    771     return 0;
    772 }
    773 //! calls common_alu_int_lit
    774 int common_shift_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) {
    775     return common_alu_int_lit(opc, vA, vB, imm);
    776 }
    777 #undef p_GPR_1
    778 //! lower bytecode ADD_INT_LIT16 by calling common_alu_int_lit
    779 
    780 //!
    781 int op_add_int_lit16() {
    782     u2 vA = INST_A(inst);
    783     u2 vB = INST_B(inst);
    784     s4 tmp = (s2)FETCH(1);
    785     int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
    786     rPC += 2;
    787     return retval;
    788 }
    789 
    790 int alu_rsub_int(ALU_Opcode opc, u2 vA, s2 imm, u2 vB) {
    791     move_imm_to_reg(OpndSize_32, imm, 2, false);
    792     get_virtual_reg(vB, OpndSize_32, 1, false);
    793     alu_binary_reg_reg(OpndSize_32, opc, 1, false, 2, false);
    794     set_virtual_reg(vA, OpndSize_32, 2, false);
    795     return 0;
    796 }
    797 
    798 
    799 //! lower bytecode RSUB_INT by calling common_alu_int_lit
    800 
    801 //!
    802 int op_rsub_int() {
    803     u2 vA = INST_A(inst);
    804     u2 vB = INST_B(inst);
    805     s4 tmp = (s2)FETCH(1);
    806     int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
    807     rPC += 2;
    808     return retval;
    809 }
    810 //! lower bytecode MUL_INT_LIT16 by calling common_alu_int_lit
    811 
    812 //!
    813 int op_mul_int_lit16() {
    814     u2 vA = INST_A(inst);
    815     u2 vB = INST_B(inst);
    816     s4 tmp = (s2)FETCH(1);
    817     int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
    818     rPC += 2;
    819     return retval;
    820 }
    821 //! lower bytecode AND_INT_LIT16 by calling common_alu_int_lit
    822 
    823 //!
    824 int op_and_int_lit16() {
    825     u2 vA = INST_A(inst);
    826     u2 vB = INST_B(inst);
    827     s4 tmp = (s2)FETCH(1);
    828     int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
    829     rPC += 2;
    830     return retval;
    831 }
    832 //! lower bytecode OR_INT_LIT16 by calling common_alu_int_lit
    833 
    834 //!
    835 int op_or_int_lit16() {
    836     u2 vA = INST_A(inst);
    837     u2 vB = INST_B(inst);
    838     s4 tmp = (s2)FETCH(1);
    839     int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
    840     rPC += 2;
    841     return retval;
    842 }
    843 //! lower bytecode XOR_INT_LIT16 by calling common_alu_int_lit
    844 
    845 //!
    846 int op_xor_int_lit16() {
    847     u2 vA = INST_A(inst);
    848     u2 vB = INST_B(inst);
    849     s4 tmp = (s2)FETCH(1);
    850     int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
    851     rPC += 2;
    852     return retval;
    853 }
    854 //! lower bytecode SHL_INT_LIT16 by calling common_shift_int_lit
    855 
    856 //!
    857 int op_shl_int_lit16() {
    858     u2 vA = INST_A(inst);
    859     u2 vB = INST_B(inst);
    860     s4 tmp = (s2)FETCH(1);
    861     int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
    862     rPC += 2;
    863     return retval;
    864 }
    865 //! lower bytecode SHR_INT_LIT16 by calling common_shift_int_lit
    866 
    867 //!
    868 int op_shr_int_lit16() {
    869     u2 vA = INST_A(inst);
    870     u2 vB = INST_B(inst);
    871     s4 tmp = (s2)FETCH(1);
    872     int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
    873     rPC += 2;
    874     return retval;
    875 }
    876 //! lower bytecode USHR_INT_LIT16 by calling common_shift_int_lit
    877 
    878 //!
    879 int op_ushr_int_lit16() {
    880     u2 vA = INST_A(inst);
    881     u2 vB = INST_B(inst);
    882     s4 tmp = (s2)FETCH(1);
    883     int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
    884     rPC += 2;
    885     return retval;
    886 }
    887 //! lower bytecode ADD_INT_LIT8 by calling common_alu_int_lit
    888 
    889 //!
    890 int op_add_int_lit8() {
    891     u2 vA = INST_AA(inst);
    892     u2 vB = (u2)FETCH(1) & 0xff;
    893     s2 tmp = (s2)FETCH(1) >> 8;
    894     int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
    895     rPC += 2;
    896     return retval;
    897 }
    898 //! lower bytecode RSUB_INT_LIT8 by calling common_alu_int_lit
    899 
    900 //!
    901 int op_rsub_int_lit8() {
    902     u2 vA = INST_AA(inst);
    903     u2 vB = (u2)FETCH(1) & 0xff;
    904     s2 tmp = (s2)FETCH(1) >> 8;
    905     int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
    906     rPC += 2;
    907     return retval;
    908 }
    909 //! lower bytecode MUL_INT_LIT8 by calling common_alu_int_lit
    910 
    911 //!
    912 int op_mul_int_lit8() {
    913     u2 vA = INST_AA(inst);
    914     u2 vB = (u2)FETCH(1) & 0xff;
    915     s2 tmp = (s2)FETCH(1) >> 8;
    916     int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
    917     rPC += 2;
    918     return retval;
    919 }
    920 //! lower bytecode AND_INT_LIT8 by calling common_alu_int_lit
    921 
    922 //!
    923 int op_and_int_lit8() {
    924     u2 vA = INST_AA(inst);
    925     u2 vB = (u2)FETCH(1) & 0xff;
    926     s2 tmp = (s2)FETCH(1) >> 8;
    927     int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
    928     rPC += 2;
    929     return retval;
    930 }
    931 //! lower bytecode OR_INT_LIT8 by calling common_alu_int_lit
    932 
    933 //!
    934 int op_or_int_lit8() {
    935     u2 vA = INST_AA(inst);
    936     u2 vB = (u2)FETCH(1) & 0xff;
    937     s2 tmp = (s2)FETCH(1) >> 8;
    938     int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
    939     rPC += 2;
    940     return retval;
    941 }
    942 //! lower bytecode XOR_INT_LIT8 by calling common_alu_int_lit
    943 
    944 //!
    945 int op_xor_int_lit8() {
    946     u2 vA = INST_AA(inst);
    947     u2 vB = (u2)FETCH(1) & 0xff;
    948     s2 tmp = (s2)FETCH(1) >> 8;
    949     int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
    950     rPC += 2;
    951     return retval;
    952 }
    953 //! lower bytecode SHL_INT_LIT8 by calling common_shift_int_lit
    954 
    955 //!
    956 int op_shl_int_lit8() {
    957     u2 vA = INST_AA(inst);
    958     u2 vB = (u2)FETCH(1) & 0xff;
    959     s2 tmp = (s2)FETCH(1) >> 8;
    960     int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
    961     rPC += 2;
    962     return retval;
    963 }
    964 //! lower bytecode SHR_INT_LIT8 by calling common_shift_int_lit
    965 
    966 //!
    967 int op_shr_int_lit8() {
    968     u2 vA = INST_AA(inst);
    969     u2 vB = (u2)FETCH(1) & 0xff;
    970     s2 tmp = (s2)FETCH(1) >> 8;
    971     int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
    972     rPC += 2;
    973     return retval;
    974 }
    975 //! lower bytecode USHR_INT_LIT8 by calling common_shift_int_lit
    976 
    977 //!
    978 int op_ushr_int_lit8() {
    979     u2 vA = INST_AA(inst);
    980     u2 vB = (u2)FETCH(1) & 0xff;
    981     s2 tmp = (s2)FETCH(1) >> 8;
    982     int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
    983     rPC += 2;
    984     return retval;
    985 }
    986 
    987 int isPowerOfTwo(int imm) {
    988     int i;
    989     for(i = 1; i < 17; i++) {
    990         if(imm == (1 << i)) return i;
    991     }
    992     return -1;
    993 }
    994 
    995 #define P_GPR_1 PhysicalReg_EBX
    996 int div_lit_strength_reduction(u2 vA, u2 vB, s2 imm) {
    997     if(gDvm.executionMode == kExecutionModeNcgO1) {
    998         //strength reduction for div by 2,4,8,...
    999         int power = isPowerOfTwo(imm);
   1000         if(power < 1) return 0;
   1001         //tmp2 is not updated, so it can share with vB
   1002         get_virtual_reg(vB, OpndSize_32, 2, false);
   1003         //if imm is 2, power will be 1
   1004         if(power == 1) {
   1005             /* mov tmp1, tmp2
   1006                shrl $31, tmp1
   1007                addl tmp2, tmp1
   1008                sarl $1, tmp1 */
   1009             move_reg_to_reg(OpndSize_32, 2, false, 1, false);
   1010             alu_binary_imm_reg(OpndSize_32, shr_opc, 31, 1, false);
   1011             alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
   1012             alu_binary_imm_reg(OpndSize_32, sar_opc, 1, 1, false);
   1013             set_virtual_reg(vA, OpndSize_32, 1, false);
   1014             return 1;
   1015         }
   1016         //power > 1
   1017         /* mov tmp1, tmp2
   1018            sarl $power-1, tmp1
   1019            shrl 32-$power, tmp1
   1020            addl tmp2, tmp1
   1021            sarl $power, tmp1 */
   1022         move_reg_to_reg(OpndSize_32, 2, false, 1, false);
   1023         alu_binary_imm_reg(OpndSize_32, sar_opc, power-1, 1, false);
   1024         alu_binary_imm_reg(OpndSize_32, shr_opc, 32-power, 1, false);
   1025         alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
   1026         alu_binary_imm_reg(OpndSize_32, sar_opc, power, 1, false);
   1027         set_virtual_reg(vA, OpndSize_32, 1, false);
   1028         return 1;
   1029     }
   1030     return 0;
   1031 }
   1032 
   1033 ////////// throws exception!!!
   1034 //! common code to handle integer DIV & REM with literal
   1035 
   1036 //! It uses GPR
   1037 int common_div_rem_int_lit(bool isRem, u2 vA, u2 vB, s2 imm) {
   1038     if(!isRem) {
   1039         int retCode = div_lit_strength_reduction(vA, vB, imm);
   1040         if(retCode > 0) return 0;
   1041     }
   1042     if(imm == 0) {
   1043         export_pc(); //use %edx
   1044 #ifdef DEBUG_EXCEPTION
   1045         LOGI("EXTRA code to handle exception");
   1046 #endif
   1047         constVREndOfBB();
   1048         beforeCall("exception"); //dump GG, GL VRs
   1049         unconditional_jump_global_API(
   1050                           "common_errDivideByZero", false);
   1051 
   1052         return 0;
   1053     }
   1054     get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
   1055     //check against -1 for DIV_INT??
   1056     if(imm == -1) {
   1057         compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
   1058         conditional_jump(Condition_E, ".div_rem_int_lit_special", true);
   1059         rememberState(1);
   1060     }
   1061     move_imm_to_reg(OpndSize_32, imm, 2, false);
   1062     convert_integer(OpndSize_32, OpndSize_64); //cdq
   1063     //idiv: dividend in edx:eax; quotient in eax; remainder in edx
   1064     alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
   1065     if(isRem)
   1066         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
   1067     else
   1068         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
   1069 
   1070     if(imm == -1) {
   1071         unconditional_jump(".div_rem_int_lit_okay", true);
   1072         rememberState(2);
   1073         insertLabel(".div_rem_int_lit_special", true);
   1074         goToState(1);
   1075         if(isRem)
   1076             set_VR_to_imm(vA, OpndSize_32, 0);
   1077         else
   1078             set_VR_to_imm(vA, OpndSize_32, 0x80000000);
   1079         transferToState(2);
   1080     }
   1081 
   1082     insertLabel(".div_rem_int_lit_okay", true); //merge point 2
   1083     return 0;
   1084 }
   1085 #undef P_GPR_1
   1086 //! lower bytecode DIV_INT_LIT16 by calling common_div_rem_int_lit
   1087 
   1088 //!
   1089 int op_div_int_lit16() {
   1090     u2 vA = INST_A(inst);
   1091     u2 vB = INST_B(inst);
   1092     s4 tmp = (s2)FETCH(1);
   1093     int retval = common_div_rem_int_lit(false, vA, vB, tmp);
   1094     rPC += 2;
   1095     return retval;
   1096 }
   1097 //! lower bytecode REM_INT_LIT16 by calling common_div_rem_int_lit
   1098 
   1099 //!
   1100 int op_rem_int_lit16() {
   1101     u2 vA = INST_A(inst);
   1102     u2 vB = INST_B(inst);
   1103     s4 tmp = (s2)FETCH(1);
   1104     int retval = common_div_rem_int_lit(true, vA, vB, tmp);
   1105     rPC += 2;
   1106     return retval;
   1107 }
   1108 //! lower bytecode DIV_INT_LIT8 by calling common_div_rem_int_lit
   1109 
   1110 //!
   1111 int op_div_int_lit8() {
   1112     u2 vA = INST_AA(inst);
   1113     u2 vB = (u2)FETCH(1) & 0xff;
   1114     s2 tmp = (s2)FETCH(1) >> 8;
   1115     int retval = common_div_rem_int_lit(false, vA, vB, tmp);
   1116     rPC += 2;
   1117     return retval;
   1118 }
   1119 //! lower bytecode REM_INT_LIT8 by calling common_div_rem_int_lit
   1120 
   1121 //!
   1122 int op_rem_int_lit8() {
   1123     u2 vA = INST_AA(inst);
   1124     u2 vB = (u2)FETCH(1) & 0xff;
   1125     s2 tmp = (s2)FETCH(1) >> 8;
   1126     int retval = common_div_rem_int_lit(true, vA, vB, tmp);
   1127     rPC += 2;
   1128     return retval;
   1129 }
   1130 //! common code to hanle long ALU ops
   1131 
   1132 //! It uses XMM
   1133 int common_alu_long(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
   1134     get_virtual_reg(v1, OpndSize_64, 1, false);
   1135     get_virtual_reg(v2, OpndSize_64, 2, false);
   1136     alu_binary_reg_reg(OpndSize_64, opc, 2, false, 1, false);
   1137     set_virtual_reg(vA, OpndSize_64, 1, false);
   1138     return 0;
   1139 }
   1140 //! lower bytecode ADD_LONG by calling common_alu_long
   1141 
   1142 //!
   1143 int op_add_long() {
   1144     u2 vA = INST_AA(inst);
   1145     u2 v1 = *((u1*)rPC + 2);
   1146     u2 v2 = *((u1*)rPC + 3);
   1147     int retval = common_alu_long(add_opc, vA, v1, v2);
   1148     rPC += 2;
   1149     return retval;
   1150 }
   1151 //! lower bytecode SUB_LONG by calling common_alu_long
   1152 
   1153 //!
   1154 int op_sub_long() {
   1155     u2 vA = INST_AA(inst);
   1156     u2 v1 = *((u1*)rPC + 2);
   1157     u2 v2 = *((u1*)rPC + 3);
   1158     int retval = common_alu_long(sub_opc, vA, v1, v2);
   1159     rPC += 2;
   1160     return retval;
   1161 }
   1162 //! lower bytecode AND_LONG by calling common_alu_long
   1163 
   1164 //!
   1165 int op_and_long() {
   1166     u2 vA = INST_AA(inst);
   1167     u2 v1 = *((u1*)rPC + 2);
   1168     u2 v2 = *((u1*)rPC + 3);
   1169     int retval = common_alu_long(and_opc, vA, v1, v2);
   1170     rPC += 2;
   1171     return retval;
   1172 }
   1173 //! lower bytecode OR_LONG by calling common_alu_long
   1174 
   1175 //!
   1176 int op_or_long() {
   1177     u2 vA = INST_AA(inst);
   1178     u2 v1 = *((u1*)rPC + 2);
   1179     u2 v2 = *((u1*)rPC + 3);
   1180     int retval = common_alu_long(or_opc, vA, v1, v2);
   1181     rPC += 2;
   1182     return retval;
   1183 }
   1184 //! lower bytecode XOR_LONG by calling common_alu_long
   1185 
   1186 //!
   1187 int op_xor_long() {
   1188     u2 vA = INST_AA(inst);
   1189     u2 v1 = *((u1*)rPC + 2);
   1190     u2 v2 = *((u1*)rPC + 3);
   1191     int retval = common_alu_long(xor_opc, vA, v1, v2);
   1192     rPC += 2;
   1193     return retval;
   1194 }
   1195 //! lower bytecode ADD_LONG_2ADDR by calling common_alu_long
   1196 
   1197 //!
   1198 int op_add_long_2addr() {
   1199     u2 vA = INST_A(inst);
   1200     u2 v1 = vA;
   1201     u2 v2 = INST_B(inst);
   1202     int retval = common_alu_long(add_opc, vA, v1, v2);
   1203     rPC += 1;
   1204     return retval;
   1205 }
   1206 //! lower bytecode SUB_LONG_2ADDR by calling common_alu_long
   1207 
   1208 //!
   1209 int op_sub_long_2addr() {
   1210     u2 vA = INST_A(inst);
   1211     u2 v1 = vA;
   1212     u2 v2 = INST_B(inst);
   1213     int retval = common_alu_long(sub_opc, vA, v1, v2);
   1214     rPC += 1;
   1215     return retval;
   1216 }
   1217 //! lower bytecode AND_LONG_2ADDR by calling common_alu_long
   1218 
   1219 //!
   1220 int op_and_long_2addr() {
   1221     u2 vA = INST_A(inst);
   1222     u2 v1 = vA;
   1223     u2 v2 = INST_B(inst);
   1224     int retval = common_alu_long(and_opc, vA, v1, v2);
   1225     rPC += 1;
   1226     return retval;
   1227 }
   1228 //! lower bytecode OR_LONG_2ADDR by calling common_alu_long
   1229 
   1230 //!
   1231 int op_or_long_2addr() {
   1232     u2 vA = INST_A(inst);
   1233     u2 v1 = vA;
   1234     u2 v2 = INST_B(inst);
   1235     int retval = common_alu_long(or_opc, vA, v1, v2);
   1236     rPC += 1;
   1237     return retval;
   1238 }
   1239 //! lower bytecode XOR_LONG_2ADDR by calling common_alu_long
   1240 
   1241 //!
   1242 int op_xor_long_2addr() {
   1243     u2 vA = INST_A(inst);
   1244     u2 v1 = vA;
   1245     u2 v2 = INST_B(inst);
   1246     int retval = common_alu_long(xor_opc, vA, v1, v2);
   1247     rPC += 1;
   1248     return retval;
   1249 }
   1250 
   1251 //signed vs unsigned imul and mul?
   1252 #define P_GPR_1 PhysicalReg_EBX
   1253 #define P_GPR_2 PhysicalReg_ECX
   1254 #define P_GPR_3 PhysicalReg_ESI
   1255 //! common code to handle multiplication of long
   1256 
   1257 //! It uses GPR
   1258 int common_mul_long(u2 vA, u2 v1, u2 v2) {
   1259     get_virtual_reg(v2, OpndSize_32, 1, false);
   1260     move_reg_to_reg(OpndSize_32, 1, false, PhysicalReg_EAX, true);
   1261     //imul: 2L * 1H update temporary 1
   1262     alu_binary_VR_reg(OpndSize_32, imul_opc, (v1+1), 1, false);
   1263     get_virtual_reg(v1, OpndSize_32, 3, false);
   1264     move_reg_to_reg(OpndSize_32, 3, false, 2, false);
   1265     //imul: 1L * 2H
   1266     alu_binary_VR_reg(OpndSize_32, imul_opc, (v2+1), 2, false);
   1267     alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
   1268     alu_unary_reg(OpndSize_32, mul_opc, 3, false);
   1269     alu_binary_reg_reg(OpndSize_32, add_opc, PhysicalReg_EDX, true, 1, false);
   1270     set_virtual_reg(vA+1, OpndSize_32, 1, false);
   1271     set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
   1272     return 0;
   1273 }
   1274 #undef P_GPR_1
   1275 #undef P_GPR_2
   1276 #undef P_GPR_3
   1277 //! lower bytecode MUL_LONG by calling common_mul_long
   1278 
   1279 //!
   1280 int op_mul_long() {
   1281     u2 vA = INST_AA(inst);
   1282     u2 v1 = *((u1*)rPC + 2);
   1283     u2 v2 = *((u1*)rPC + 3);
   1284     int retval = common_mul_long(vA, v1, v2);
   1285     rPC += 2;
   1286     return retval;
   1287 }
   1288 //! lower bytecode MUL_LONG_2ADDR by calling common_mul_long
   1289 
   1290 //!
   1291 int op_mul_long_2addr() {
   1292     u2 vA = INST_A(inst);
   1293     u2 v1 = vA;
   1294     u2 v2 = INST_B(inst);
   1295     int retval = common_mul_long(vA, v1, v2);
   1296     rPC += 1;
   1297     return retval;
   1298 }
   1299 
   1300 #define P_GPR_1 PhysicalReg_EBX
   1301 #define P_GPR_2 PhysicalReg_ECX
   1302 //! common code to handle DIV & REM of long
   1303 
   1304 //! It uses GPR & XMM; and calls call_moddi3 & call_divdi3
   1305 int common_div_rem_long(bool isRem, u2 vA, u2 v1, u2 v2) {
   1306     get_virtual_reg(v2, OpndSize_32, 1, false);
   1307     get_virtual_reg(v2+1, OpndSize_32, 2, false);
   1308     //save to native stack before changing register P_GPR_1
   1309     load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1310     move_reg_to_mem(OpndSize_32, 1, false, 8, PhysicalReg_ESP, true);
   1311     alu_binary_reg_reg(OpndSize_32, or_opc, 2, false, 1, false);
   1312 
   1313     handlePotentialException(
   1314                                        Condition_E, Condition_NE,
   1315                                        1, "common_errDivideByZero");
   1316     move_reg_to_mem(OpndSize_32, 2, false, 12, PhysicalReg_ESP, true);
   1317     get_virtual_reg(v1, OpndSize_64, 1, false);
   1318     move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
   1319     scratchRegs[0] = PhysicalReg_SCRATCH_1;
   1320     nextVersionOfHardReg(PhysicalReg_EDX, 2); //next version has 2 refs
   1321     if(isRem)
   1322         call_moddi3();
   1323     else
   1324         call_divdi3();
   1325     load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1326     set_virtual_reg(vA+1, OpndSize_32,PhysicalReg_EDX, true);
   1327     set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
   1328     return 0;
   1329 }
   1330 #undef P_GPR_1
   1331 #undef P_GPR_2
   1332 //! lower bytecode DIV_LONG by calling common_div_rem_long
   1333 
   1334 //!
   1335 int op_div_long() {
   1336     u2 vA = INST_AA(inst);
   1337     u2 v1 = *((u1*)rPC + 2);
   1338     u2 v2 = *((u1*)rPC + 3);
   1339     int retval = common_div_rem_long(false, vA, v1, v2);
   1340     rPC += 2;
   1341     return retval;
   1342 }
   1343 //! lower bytecode REM_LONG by calling common_div_rem_long
   1344 
   1345 //!
   1346 int op_rem_long() {
   1347     u2 vA = INST_AA(inst);
   1348     u2 v1 = *((u1*)rPC + 2);
   1349     u2 v2 = *((u1*)rPC + 3);
   1350     int retval = common_div_rem_long(true, vA, v1, v2);
   1351     rPC += 2;
   1352     return retval;
   1353 }
   1354 //! lower bytecode DIV_LONG_2ADDR by calling common_div_rem_long
   1355 
   1356 //!
   1357 int op_div_long_2addr() {
   1358     u2 vA = INST_A(inst);
   1359     u2 v1 = vA;
   1360     u2 v2 = INST_B(inst);
   1361     int retval = common_div_rem_long(false, vA, v1, v2);
   1362     rPC += 1;
   1363     return retval;
   1364 }
   1365 //! lower bytecode REM_LONG_2ADDR by calling common_div_rem_long
   1366 
   1367 //!
   1368 int op_rem_long_2addr() { //call __moddi3 instead of __divdi3
   1369     u2 vA = INST_A(inst);
   1370     u2 v1 = vA;
   1371     u2 v2 = INST_B(inst);
   1372     int retval = common_div_rem_long(true, vA, v1, v2);
   1373     rPC += 1;
   1374     return retval;
   1375 }
   1376 
   1377 //! common code to handle SHL long
   1378 
   1379 //! It uses XMM
   1380 int common_shl_long(u2 vA, u2 v1, u2 v2) {
   1381     get_VR_ss(v2, 2, false);
   1382 
   1383     load_global_data_API("shiftMask", OpndSize_64, 3, false);
   1384 
   1385     get_virtual_reg(v1, OpndSize_64, 1, false);
   1386     alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
   1387     alu_binary_reg_reg(OpndSize_64, sll_opc, 2, false, 1, false);
   1388     set_virtual_reg(vA, OpndSize_64, 1, false);
   1389     return 0;
   1390 }
   1391 
   1392 //! common code to handle SHR long
   1393 
   1394 //! It uses XMM
   1395 int common_shr_long(u2 vA, u2 v1, u2 v2) {
   1396     get_VR_ss(v2, 2, false);
   1397 
   1398     load_global_data_API("shiftMask", OpndSize_64, 3, false);
   1399 
   1400     get_virtual_reg(v1, OpndSize_64, 1, false);
   1401     alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
   1402     alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
   1403     compare_imm_VR(OpndSize_32, 0, (v1+1));
   1404     conditional_jump(Condition_GE, ".common_shr_long_special", true);
   1405     rememberState(1);
   1406 
   1407     load_global_data_API("value64", OpndSize_64, 4, false);
   1408 
   1409     alu_binary_reg_reg(OpndSize_64, sub_opc, 2, false, 4, false);
   1410 
   1411     load_global_data_API("64bits", OpndSize_64, 5, false);
   1412 
   1413     alu_binary_reg_reg(OpndSize_64, sll_opc, 4, false, 5, false);
   1414     alu_binary_reg_reg(OpndSize_64, or_opc, 5, false, 1, false);
   1415     rememberState(2);
   1416     //check whether the target is next instruction TODO
   1417     unconditional_jump(".common_shr_long_done", true);
   1418 
   1419     insertLabel(".common_shr_long_special", true);
   1420     goToState(1);
   1421     transferToState(2);
   1422     insertLabel(".common_shr_long_done", true);
   1423     set_virtual_reg(vA, OpndSize_64, 1, false);
   1424     return 0;
   1425 }
   1426 
   1427 //! common code to handle USHR long
   1428 
   1429 //! It uses XMM
   1430 int common_ushr_long(u2 vA, u2 v1, u2 v2) {
   1431     get_VR_sd(v1, 1, false);
   1432     get_VR_ss(v2, 2, false);
   1433 
   1434     load_sd_global_data_API("shiftMask", 3, false);
   1435 
   1436     alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
   1437     alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
   1438     set_VR_sd(vA, 1, false);
   1439     return 0;
   1440 }
   1441 //! lower bytecode SHL_LONG by calling common_shl_long
   1442 
   1443 //!
   1444 int op_shl_long() {
   1445     u2 vA = INST_AA(inst);
   1446     u2 v1 = *((u1*)rPC + 2);
   1447     u2 v2 = *((u1*)rPC + 3);
   1448     int retval = common_shl_long(vA, v1, v2);
   1449     rPC += 2;
   1450     return retval;
   1451 }
   1452 //! lower bytecode SHL_LONG_2ADDR by calling common_shl_long
   1453 
   1454 //!
   1455 int op_shl_long_2addr() {
   1456     u2 vA = INST_A(inst);
   1457     u2 v1 = vA;
   1458     u2 v2 = INST_B(inst);
   1459     int retval = common_shl_long(vA, v1, v2);
   1460     rPC += 1;
   1461     return retval;
   1462 }
   1463 //! lower bytecode SHR_LONG by calling common_shr_long
   1464 
   1465 //!
   1466 int op_shr_long() {
   1467     u2 vA = INST_AA(inst);
   1468     u2 v1 = *((u1*)rPC + 2);
   1469     u2 v2 = *((u1*)rPC + 3);
   1470     int retval = common_shr_long(vA, v1, v2);
   1471     rPC += 2;
   1472     return retval;
   1473 }
   1474 //! lower bytecode SHR_LONG_2ADDR by calling common_shr_long
   1475 
   1476 //!
   1477 int op_shr_long_2addr() {
   1478     u2 vA = INST_A(inst);
   1479     u2 v1 = vA;
   1480     u2 v2 = INST_B(inst);
   1481     int retval = common_shr_long(vA, v1, v2);
   1482     rPC += 1;
   1483     return retval;
   1484 }
   1485 //! lower bytecode USHR_LONG by calling common_ushr_long
   1486 
   1487 //!
   1488 int op_ushr_long() {
   1489     u2 vA = INST_AA(inst);
   1490     u2 v1 = *((u1*)rPC + 2);
   1491     u2 v2 = *((u1*)rPC + 3);
   1492     int retval = common_ushr_long(vA, v1, v2);
   1493     rPC += 2;
   1494     return retval;
   1495 }
   1496 //! lower bytecode USHR_LONG_2ADDR by calling common_ushr_long
   1497 
   1498 //!
   1499 int op_ushr_long_2addr() {
   1500     u2 vA = INST_A(inst);
   1501     u2 v1 = vA;
   1502     u2 v2 = INST_B(inst);
   1503     int retval = common_ushr_long(vA, v1, v2);
   1504     rPC += 1;
   1505     return retval;
   1506 }
   1507 #define USE_MEM_OPERAND
   1508 ///////////////////////////////////////////
   1509 //! common code to handle ALU of floats
   1510 
   1511 //! It uses XMM
   1512 int common_alu_float(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
   1513     get_VR_ss(v1, 1, false);
   1514 #ifdef USE_MEM_OPERAND
   1515     alu_sd_binary_VR_reg(opc, v2, 1, false, false/*isSD*/);
   1516 #else
   1517     get_VR_ss(v2, 2, false);
   1518     alu_ss_binary_reg_reg(opc, 2, false, 1, false);
   1519 #endif
   1520     set_VR_ss(vA, 1, false);
   1521     return 0;
   1522 }
   1523 //! lower bytecode ADD_FLOAT by calling common_alu_float
   1524 
   1525 //!
   1526 int op_add_float() {
   1527     u2 vA = INST_AA(inst);
   1528     u2 v1 = *((u1*)rPC + 2);
   1529     u2 v2 = *((u1*)rPC + 3);
   1530     int retval = common_alu_float(add_opc, vA, v1, v2);
   1531     rPC += 2;
   1532     return retval;
   1533 }
   1534 //! lower bytecode SUB_FLOAT by calling common_alu_float
   1535 
   1536 //!
   1537 int op_sub_float() {
   1538     u2 vA = INST_AA(inst);
   1539     u2 v1 = *((u1*)rPC + 2);
   1540     u2 v2 = *((u1*)rPC + 3);
   1541     int retval = common_alu_float(sub_opc, vA, v1, v2);
   1542     rPC += 2;
   1543     return retval;
   1544 }
   1545 //! lower bytecode MUL_FLOAT by calling common_alu_float
   1546 
   1547 //!
   1548 int op_mul_float() {
   1549     u2 vA = INST_AA(inst);
   1550     u2 v1 = *((u1*)rPC + 2);
   1551     u2 v2 = *((u1*)rPC + 3);
   1552     int retval = common_alu_float(mul_opc, vA, v1, v2);
   1553     rPC += 2;
   1554     return retval;
   1555 }
   1556 //! lower bytecode ADD_FLOAT_2ADDR by calling common_alu_float
   1557 
   1558 //!
   1559 int op_add_float_2addr() {
   1560     u2 vA = INST_A(inst);
   1561     u2 v1 = vA;
   1562     u2 v2 = INST_B(inst);
   1563     int retval = common_alu_float(add_opc, vA, v1, v2);
   1564     rPC += 1;
   1565     return retval;
   1566 }
   1567 //! lower bytecode SUB_FLOAT_2ADDR by calling common_alu_float
   1568 
   1569 //!
   1570 int op_sub_float_2addr() {
   1571     u2 vA = INST_A(inst);
   1572     u2 v1 = vA;
   1573     u2 v2 = INST_B(inst);
   1574     int retval = common_alu_float(sub_opc, vA, v1, v2);
   1575     rPC += 1;
   1576     return retval;
   1577 }
   1578 //! lower bytecode MUL_FLOAT_2ADDR by calling common_alu_float
   1579 
   1580 //!
   1581 int op_mul_float_2addr() {
   1582     u2 vA = INST_A(inst);
   1583     u2 v1 = vA;
   1584     u2 v2 = INST_B(inst);
   1585     int retval = common_alu_float(mul_opc, vA, v1, v2);
   1586     rPC += 1;
   1587     return retval;
   1588 }
   1589 //! common code to handle DIV of float
   1590 
   1591 //! It uses FP stack
   1592 int common_div_float(u2 vA, u2 v1, u2 v2) {
   1593     load_fp_stack_VR(OpndSize_32, v1); //flds
   1594     fpu_VR(div_opc, OpndSize_32, v2);
   1595     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
   1596     return 0;
   1597 }
   1598 //! lower bytecode DIV_FLOAT by calling common_div_float
   1599 
   1600 //!
   1601 int op_div_float() {
   1602     u2 vA = INST_AA(inst);
   1603     u2 v1 = *((u1*)rPC + 2);
   1604     u2 v2 = *((u1*)rPC + 3);
   1605     int retval = common_alu_float(div_opc, vA, v1, v2);
   1606     rPC += 2;
   1607     return retval;
   1608 }
   1609 //! lower bytecode DIV_FLOAT_2ADDR by calling common_div_float
   1610 
   1611 //!
   1612 int op_div_float_2addr() {
   1613     u2 vA = INST_A(inst);
   1614     u2 v1 = vA;
   1615     u2 v2 = INST_B(inst);
   1616     int retval = common_alu_float(div_opc, vA, v1, v2);
   1617     rPC += 1;
   1618     return retval;
   1619 }
   1620 //! common code to handle DIV of double
   1621 
   1622 //! It uses XMM
   1623 int common_alu_double(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
   1624     get_VR_sd(v1, 1, false);
   1625 #ifdef USE_MEM_OPERAND
   1626     alu_sd_binary_VR_reg(opc, v2, 1, false, true /*isSD*/);
   1627 #else
   1628     get_VR_sd(v2, 2, false);
   1629     alu_sd_binary_reg_reg(opc, 2, false, 1, false);
   1630 #endif
   1631     set_VR_sd(vA, 1, false);
   1632     return 0;
   1633 }
   1634 //! lower bytecode ADD_DOUBLE by calling common_alu_double
   1635 
   1636 //!
   1637 int op_add_double() {
   1638     u2 vA = INST_AA(inst);
   1639     u2 v1 = *((u1*)rPC + 2);
   1640     u2 v2 = *((u1*)rPC + 3);
   1641     int retval = common_alu_double(add_opc, vA, v1, v2);
   1642     rPC += 2;
   1643     return retval;
   1644 }
   1645 //! lower bytecode SUB_DOUBLE by calling common_alu_double
   1646 
   1647 //!
   1648 int op_sub_double() {
   1649     u2 vA = INST_AA(inst);
   1650     u2 v1 = *((u1*)rPC + 2);
   1651     u2 v2 = *((u1*)rPC + 3);
   1652     int retval = common_alu_double(sub_opc, vA, v1, v2);
   1653     rPC += 2;
   1654     return retval;
   1655 }
   1656 //! lower bytecode MUL_DOUBLE by calling common_alu_double
   1657 
   1658 //!
   1659 int op_mul_double() {
   1660     u2 vA = INST_AA(inst);
   1661     u2 v1 = *((u1*)rPC + 2);
   1662     u2 v2 = *((u1*)rPC + 3);
   1663     int retval = common_alu_double(mul_opc, vA, v1, v2);
   1664     rPC += 2;
   1665     return retval;
   1666 }
   1667 //! lower bytecode ADD_DOUBLE_2ADDR by calling common_alu_double
   1668 
   1669 //!
   1670 int op_add_double_2addr() {
   1671     u2 vA = INST_A(inst);
   1672     u2 v1 = vA;
   1673     u2 v2 = INST_B(inst);
   1674     int retval = common_alu_double(add_opc, vA, v1, v2);
   1675     rPC += 1;
   1676     return retval;
   1677 }
   1678 //! lower bytecode SUB_DOUBLE_2ADDR by calling common_alu_double
   1679 
   1680 //!
   1681 int op_sub_double_2addr() {
   1682     u2 vA = INST_A(inst);
   1683     u2 v1 = vA;
   1684     u2 v2 = INST_B(inst);
   1685     int retval = common_alu_double(sub_opc, vA, v1, v2);
   1686     rPC += 1;
   1687     return retval;
   1688 }
   1689 //! lower bytecode MUL_DOUBLE_2ADDR by calling common_alu_double
   1690 
   1691 //!
   1692 int op_mul_double_2addr() {
   1693     u2 vA = INST_A(inst);
   1694     u2 v1 = vA;
   1695     u2 v2 = INST_B(inst);
   1696     int retval = common_alu_double(mul_opc, vA, v1, v2);
   1697     rPC += 1;
   1698     return retval;
   1699 }
   1700 //! common code to handle DIV of double
   1701 
   1702 //! It uses FP stack
   1703 int common_div_double(u2 vA, u2 v1, u2 v2) {
   1704     load_fp_stack_VR(OpndSize_64, v1); //fldl
   1705     fpu_VR(div_opc, OpndSize_64, v2); //fdivl
   1706     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
   1707     return 0;
   1708 }
   1709 //! lower bytecode DIV_DOUBLE by calling common_div_double
   1710 
   1711 //!
   1712 int op_div_double() {
   1713     u2 vA = INST_AA(inst);
   1714     u2 v1 = *((u1*)rPC + 2);
   1715     u2 v2 = *((u1*)rPC + 3);
   1716     int retval = common_alu_double(div_opc, vA, v1, v2);
   1717     rPC += 2;
   1718     return retval;
   1719 }
   1720 //! lower bytecode DIV_DOUBLE_2ADDR by calling common_div_double
   1721 
   1722 //!
   1723 int op_div_double_2addr() {
   1724     u2 vA = INST_A(inst);
   1725     u2 v1 = vA;
   1726     u2 v2 = INST_B(inst);
   1727     int retval = common_alu_double(div_opc, vA, v1, v2);
   1728     rPC += 1;
   1729     return retval;
   1730 }
   1731 #define P_GPR_1 PhysicalReg_EBX
   1732 #define P_GPR_2 PhysicalReg_ECX
   1733 //! common code to handle REM of float
   1734 
   1735 //! It uses GPR & calls call_fmodf
   1736 int common_rem_float(u2 vA, u2 v1, u2 v2) {
   1737     get_virtual_reg(v1, OpndSize_32, 1, false);
   1738     get_virtual_reg(v2, OpndSize_32, 2, false);
   1739     load_effective_addr(-8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1740     move_reg_to_mem(OpndSize_32, 1, false, 0, PhysicalReg_ESP, true);
   1741     move_reg_to_mem(OpndSize_32, 2, false, 4, PhysicalReg_ESP, true);
   1742     scratchRegs[0] = PhysicalReg_SCRATCH_1;
   1743     call_fmodf(); //(float x, float y) return float
   1744     load_effective_addr(8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1745     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
   1746     return 0;
   1747 }
   1748 #undef P_GPR_1
   1749 #undef P_GPR_2
   1750 //! lower bytecode REM_FLOAT by calling common_rem_float
   1751 
   1752 //!
   1753 int op_rem_float() {
   1754     u2 vA = INST_AA(inst);
   1755     u2 v1 = *((u1*)rPC + 2);
   1756     u2 v2 = *((u1*)rPC + 3);
   1757     int retval = common_rem_float(vA, v1, v2);
   1758     rPC += 2;
   1759     return retval;
   1760 }
   1761 //! lower bytecode REM_FLOAT_2ADDR by calling common_rem_float
   1762 
   1763 //!
   1764 int op_rem_float_2addr() {
   1765     u2 vA = INST_A(inst);
   1766     u2 v1 = vA;
   1767     u2 v2 = INST_B(inst);
   1768     int retval = common_rem_float(vA, v1, v2);
   1769     rPC += 1;
   1770     return retval;
   1771 }
   1772 //! common code to handle REM of double
   1773 
   1774 //! It uses XMM & calls call_fmod
   1775 int common_rem_double(u2 vA, u2 v1, u2 v2) {
   1776     get_virtual_reg(v1, OpndSize_64, 1, false);
   1777     get_virtual_reg(v2, OpndSize_64, 2, false);
   1778     load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1779     move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
   1780     move_reg_to_mem(OpndSize_64, 2, false, 8, PhysicalReg_ESP, true);
   1781     scratchRegs[0] = PhysicalReg_SCRATCH_1;
   1782     call_fmod(); //(long double x, long double y) return double
   1783     load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1784     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
   1785     return 0;
   1786 }
   1787 //! lower bytecode REM_DOUBLE by calling common_rem_double
   1788 
   1789 //!
   1790 int op_rem_double() {
   1791     u2 vA = INST_AA(inst);
   1792     u2 v1 = *((u1*)rPC + 2);
   1793     u2 v2 = *((u1*)rPC + 3);
   1794     int retval = common_rem_double(vA, v1, v2);
   1795     rPC += 2;
   1796     return retval;
   1797 }
   1798 //! lower bytecode REM_DOUBLE_2ADDR by calling common_rem_double
   1799 
   1800 //!
   1801 int op_rem_double_2addr() {
   1802     u2 vA = INST_A(inst);
   1803     u2 v1 = vA;
   1804     u2 v2 = INST_B(inst);
   1805     int retval = common_rem_double(vA, v1, v2);
   1806     rPC += 1;
   1807     return retval;
   1808 }
   1809 //! lower bytecode CMPL_FLOAT
   1810 
   1811 //!
   1812 int op_cmpl_float() {
   1813     u2 vA = INST_AA(inst);
   1814     u4 v1 = FETCH(1) & 0xff;
   1815     u4 v2 = FETCH(1) >> 8;
   1816     get_VR_ss(v1, 1, false); //xmm
   1817     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1818     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1819     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1820     compare_VR_ss_reg(v2, 1, false);
   1821     //default: 0xffffffff??
   1822     move_imm_to_reg(OpndSize_32,
   1823                                  0xffffffff, 4, false);
   1824     //ORDER of cmov matters !!! (Z,P,A)
   1825     //finalNaN: unordered 0xffffffff
   1826     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1827                                              1, false, 4, false);
   1828     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1829                                              3, false, 4, false);
   1830     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1831                                              2, false, 4, false);
   1832     set_virtual_reg(vA, OpndSize_32, 4, false);
   1833     rPC += 2;
   1834     return 0;
   1835 }
   1836 //! lower bytecode CMPG_FLOAT
   1837 
   1838 //!
   1839 int op_cmpg_float() {
   1840     u2 vA = INST_AA(inst);
   1841     u4 v1 = FETCH(1) & 0xff;
   1842     u4 v2 = FETCH(1) >> 8;
   1843     get_VR_ss(v1, 1, false);
   1844     compare_VR_ss_reg(v2, 1, false);
   1845     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1846     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1847     //default: 0xffffffff??
   1848     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1849     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1850                                 1, false, 3, false);
   1851     //finalNaN: unordered
   1852     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1853                                 2, false, 3, false);
   1854     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1855                                 2, false, 3, false);
   1856     set_virtual_reg(vA, OpndSize_32, 3, false);
   1857     rPC += 2;
   1858     return 0;
   1859 }
   1860 //! lower bytecode CMPL_DOUBLE
   1861 
   1862 //!
   1863 int op_cmpl_double() {
   1864     u2 vA = INST_AA(inst);
   1865     u4 v1 = FETCH(1) & 0xff;
   1866     u4 v2 = FETCH(1) >> 8;
   1867     get_VR_sd(v1, 1, false);
   1868     compare_VR_sd_reg(v2, 1, false);
   1869     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1870     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1871     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1872 
   1873     //default: 0xffffffff??
   1874     move_imm_to_reg(OpndSize_32, 0xffffffff, 4, false);
   1875     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1876                                              1, false, 4, false);
   1877     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1878                                              3, false, 4, false);
   1879     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1880                                              2, false, 4, false);
   1881     set_virtual_reg(vA, OpndSize_32, 4, false);
   1882     rPC += 2;
   1883     return 0;
   1884 }
   1885 //! lower bytecode CMPG_DOUBLE
   1886 
   1887 //!
   1888 int op_cmpg_double() {
   1889     u2 vA = INST_AA(inst);
   1890     u4 v1 = FETCH(1) & 0xff;
   1891     u4 v2 = FETCH(1) >> 8;
   1892     get_VR_sd(v1, 1, false);
   1893     compare_VR_sd_reg(v2, 1, false);
   1894     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1895     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1896 
   1897     //default: 0xffffffff??
   1898     move_imm_to_reg(OpndSize_32,
   1899                                  0xffffffff, 3, false);
   1900     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1901                                              1, false, 3, false);
   1902     //finalNaN: unordered
   1903     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1904                                              2, false, 3, false);
   1905     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1906                                              2, false, 3, false);
   1907    set_virtual_reg(vA, OpndSize_32, 3, false);
   1908     rPC += 2;
   1909     return 0;
   1910 }
   1911 #define P_GPR_1 PhysicalReg_EBX
   1912 #define P_GPR_2 PhysicalReg_ECX
   1913 #define P_GPR_3 PhysicalReg_ESI
   1914 #define P_SCRATCH_1 PhysicalReg_EDX
   1915 #define P_SCRATCH_2 PhysicalReg_EAX
   1916 #define OPTION_OLD //for simpler cfg
   1917 //! lower bytecode CMP_LONG
   1918 
   1919 //!
   1920 int op_cmp_long() {
   1921     u2 vA = INST_AA(inst);
   1922     u4 v1 = FETCH(1) & 0xff;
   1923     u4 v2 = FETCH(1) >> 8;
   1924     get_virtual_reg(v1+1, OpndSize_32, 2, false);
   1925 #ifdef OPTION_OLD
   1926     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1927     move_imm_to_reg(OpndSize_32, 1, 4, false);
   1928     move_imm_to_reg(OpndSize_32, 0, 5, false);
   1929 #endif
   1930     compare_VR_reg(OpndSize_32,
   1931                                 v2+1, 2, false);
   1932 #ifndef OPTION_OLD
   1933     conditional_jump(Condition_L, ".cmp_long_less", true);
   1934     conditional_jump(Condition_G, ".cmp_long_greater", true);
   1935 #else
   1936     conditional_jump(Condition_E, ".cmp_long_equal", true);
   1937     rememberState(1);
   1938     conditional_move_reg_to_reg(OpndSize_32, Condition_L, //below vs less
   1939                                              3, false, 6, false);
   1940     conditional_move_reg_to_reg(OpndSize_32, Condition_G, //above vs greater
   1941                                              4, false, 6, false);
   1942     set_virtual_reg(vA, OpndSize_32, 6, false);
   1943     rememberState(2);
   1944     unconditional_jump(".cmp_long_okay", true);
   1945     insertLabel(".cmp_long_equal", true);
   1946     goToState(1);
   1947 #endif
   1948 
   1949     get_virtual_reg(v1, OpndSize_32, 1, false);
   1950     compare_VR_reg(OpndSize_32,
   1951                                 v2, 1, false);
   1952 #ifdef OPTION_OLD
   1953     conditional_move_reg_to_reg(OpndSize_32, Condition_E,
   1954                                              5, false, 6, false);
   1955     conditional_move_reg_to_reg(OpndSize_32, Condition_B, //below vs less
   1956                                              3, false, 6, false);
   1957     conditional_move_reg_to_reg(OpndSize_32, Condition_A, //above vs greater
   1958                                              4, false, 6, false);
   1959     set_virtual_reg(vA, OpndSize_32, 6, false);
   1960     transferToState(2);
   1961 #else
   1962     conditional_jump(Condition_A, ".cmp_long_greater", true);
   1963     conditional_jump(Condition_NE, ".cmp_long_less", true);
   1964     set_VR_to_imm(vA, OpndSize_32, 0);
   1965     unconditional_jump(".cmp_long_okay", true);
   1966 
   1967     insertLabel(".cmp_long_less", true);
   1968     set_VR_to_imm(vA, OpndSize_32, 0xffffffff);
   1969     unconditional_jump(".cmp_long_okay", true);
   1970 
   1971     insertLabel(".cmp_long_greater", true);
   1972     set_VR_to_imm(vA, OpndSize_32, 1);
   1973 #endif
   1974     insertLabel(".cmp_long_okay", true);
   1975     rPC += 2;
   1976     return 0;
   1977 }
   1978 #undef P_GPR_1
   1979 #undef P_GPR_2
   1980 #undef P_GPR_3
   1981