Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 /*! \file LowerAlu.cpp
     19     \brief This file lowers ALU bytecodes.
     20 */
     21 #include "libdex/DexOpcodes.h"
     22 #include "libdex/DexFile.h"
     23 #include "Lower.h"
     24 #include "NcgAot.h"
     25 #include "enc_wrapper.h"
     26 
     27 /////////////////////////////////////////////
     28 #define P_GPR_1 PhysicalReg_EBX
     29 //! lower bytecode NEG_INT
     30 
     31 //!
     32 int op_neg_int() {
     33     u2 vA = INST_A(inst); //destination
     34     u2 vB = INST_B(inst);
     35     get_virtual_reg(vB, OpndSize_32, 1, false);
     36     alu_unary_reg(OpndSize_32, neg_opc, 1, false);
     37     set_virtual_reg(vA, OpndSize_32, 1, false);
     38     rPC += 1;
     39     return 0;
     40 }
     41 //! lower bytecode NOT_INT
     42 
     43 //!
     44 int op_not_int() {
     45     u2 vA = INST_A(inst); //destination
     46     u2 vB = INST_B(inst);
     47     get_virtual_reg(vB, OpndSize_32, 1, false);
     48     alu_unary_reg(OpndSize_32, not_opc, 1, false);
     49     set_virtual_reg(vA, OpndSize_32, 1, false);
     50     rPC += 1;
     51     return 0;
     52 }
     53 #undef P_GPR_1
     54 //! lower bytecode NEG_LONG
     55 
     56 //! This implementation uses XMM registers
     57 int op_neg_long() {
     58     u2 vA = INST_A(inst); //destination
     59     u2 vB = INST_B(inst);
     60     get_virtual_reg(vB, OpndSize_64, 1, false);
     61     alu_binary_reg_reg(OpndSize_64, xor_opc, 2, false, 2, false);
     62     alu_binary_reg_reg(OpndSize_64, sub_opc, 1, false, 2, false);
     63     set_virtual_reg(vA, OpndSize_64, 2, false);
     64     rPC += 1;
     65     return 0;
     66 }
     67 //! lower bytecode NOT_LONG
     68 
     69 //! This implementation uses XMM registers
     70 int op_not_long() {
     71     u2 vA = INST_A(inst); //destination
     72     u2 vB = INST_B(inst);
     73     get_virtual_reg(vB, OpndSize_64, 1, false);
     74     load_global_data_API("64bits", OpndSize_64, 2, false);
     75     alu_binary_reg_reg(OpndSize_64, andn_opc, 2, false, 1, false);
     76     set_virtual_reg(vA, OpndSize_64, 1, false);
     77     rPC += 1;
     78     return 0;
     79 }
     80 #define P_GPR_1 PhysicalReg_EBX
     81 //! lower bytecode NEG_FLOAT
     82 
     83 //! This implementation uses GPR
     84 int op_neg_float() {
     85     u2 vA = INST_A(inst); //destination
     86     u2 vB = INST_B(inst);
     87     get_virtual_reg(vB, OpndSize_32, 1, false);
     88     alu_binary_imm_reg(OpndSize_32, add_opc, 0x80000000, 1, false);
     89     set_virtual_reg(vA, OpndSize_32, 1, false);
     90     rPC += 1;
     91     return 0;
     92 }
     93 #undef P_GPR_1
     94 
     95 //! lower bytecode NEG_DOUBLE
     96 
     97 //! This implementation uses XMM registers
     98 int op_neg_double() {
     99     u2 vA = INST_A(inst); //destination
    100     u2 vB = INST_B(inst);
    101     get_virtual_reg(vB, OpndSize_64, 1, false);
    102     load_global_data_API("doubNeg", OpndSize_64, 2, false);
    103     alu_binary_reg_reg(OpndSize_64, xor_opc, 1, false, 2, false);
    104     set_virtual_reg(vA, OpndSize_64, 2, false);
    105     rPC += 1;
    106     return 0;
    107 }
    108 
    109 //! lower bytecode INT_TO_LONG
    110 
    111 //! It uses native instruction cdq
    112 int op_int_to_long() {
    113     u2 vA = INST_A(inst); //destination
    114     u2 vB = INST_B(inst);
    115     get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
    116     convert_integer(OpndSize_32, OpndSize_64);
    117     set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
    118     set_virtual_reg(vA+1, OpndSize_32, PhysicalReg_EDX, true);
    119     rPC += 1;
    120     return 0;
    121 }
    122 //! lower bytecode INT_TO_FLOAT
    123 
    124 //! This implementation uses FP stack
    125 int op_int_to_float() {
    126     u2 vA = INST_A(inst); //destination
    127     u2 vB = INST_B(inst);
    128     load_int_fp_stack_VR(OpndSize_32, vB); //fildl
    129     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
    130     rPC += 1;
    131     return 0;
    132 }
    133 //! lower bytecode INT_TO_DOUBLE
    134 
    135 //! This implementation uses FP stack
    136 int op_int_to_double() {
    137     u2 vA = INST_A(inst); //destination
    138     u2 vB = INST_B(inst);
    139     load_int_fp_stack_VR(OpndSize_32, vB); //fildl
    140     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
    141     rPC += 1;
    142     return 0;
    143 }
    144 //! lower bytecode LONG_TO_FLOAT
    145 
    146 //! This implementation uses FP stack
    147 int op_long_to_float() {
    148     u2 vA = INST_A(inst); //destination
    149     u2 vB = INST_B(inst);
    150     load_int_fp_stack_VR(OpndSize_64, vB); //fildll
    151     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
    152     rPC += 1;
    153     return 0;
    154 }
    155 //! lower bytecode LONG_TO_DOUBLE
    156 
    157 //! This implementation uses FP stack
    158 int op_long_to_double() {
    159     u2 vA = INST_A(inst); //destination
    160     u2 vB = INST_B(inst);
    161     load_int_fp_stack_VR(OpndSize_64, vB); //fildll
    162     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
    163     rPC += 1;
    164     return 0;
    165 }
    166 //! lower bytecode FLOAT_TO_DOUBLE
    167 
    168 //! This implementation uses FP stack
    169 int op_float_to_double() {
    170     u2 vA = INST_A(inst); //destination
    171     u2 vB = INST_B(inst);
    172     load_fp_stack_VR(OpndSize_32, vB); //flds
    173     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
    174     rPC += 1;
    175     return 0;
    176 }
    177 //! lower bytecode DOUBLE_TO_FLOAT
    178 
    179 //! This implementation uses FP stack
    180 int op_double_to_float() {
    181     u2 vA = INST_A(inst); //destination
    182     u2 vB = INST_B(inst);
    183     load_fp_stack_VR(OpndSize_64, vB); //fldl
    184     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
    185     rPC += 1;
    186     return 0;
    187 }
    188 #define P_GPR_1 PhysicalReg_EBX
    189 //! lower bytecode LONG_TO_INT
    190 
    191 //! This implementation uses GPR
    192 int op_long_to_int() {
    193     u2 vA = INST_A(inst); //destination
    194     u2 vB = INST_B(inst);
    195     get_virtual_reg(vB, OpndSize_32, 1, false);
    196     set_virtual_reg(vA, OpndSize_32, 1, false);
    197     rPC += 1;
    198     return 0;
    199 }
    200 #undef P_GPR_1
    201 
    202 //! common code to convert a float or double to integer
    203 
    204 //! It uses FP stack
    205 int common_fp_to_int(bool isDouble, u2 vA, u2 vB) {
    206     if(isDouble) {
    207         load_fp_stack_VR(OpndSize_64, vB); //fldl
    208     }
    209     else {
    210         load_fp_stack_VR(OpndSize_32, vB); //flds
    211     }
    212 
    213     load_fp_stack_global_data_API("intMax", OpndSize_32);
    214     load_fp_stack_global_data_API("intMin", OpndSize_32);
    215 
    216     //ST(0) ST(1) ST(2) --> LintMin LintMax value
    217     compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
    218     //ST(0) ST(1) --> LintMax value
    219     conditional_jump(Condition_AE, ".float_to_int_negInf", true);
    220     rememberState(1);
    221     compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
    222     //ST(0) --> value
    223     rememberState(2);
    224     conditional_jump(Condition_C, ".float_to_int_nanInf", true);
    225     //fnstcw, orw, fldcw, xorw
    226     load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    227     store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
    228     alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
    229     load_fpu_cw(0, PhysicalReg_ESP, true);
    230     alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
    231     store_int_fp_stack_VR(true/*pop*/, OpndSize_32, vA); //fistpl
    232     //fldcw
    233     load_fpu_cw(0, PhysicalReg_ESP, true);
    234     load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    235     rememberState(3);
    236     unconditional_jump(".float_to_int_okay", true);
    237     insertLabel(".float_to_int_nanInf", true);
    238     conditional_jump(Condition_NP, ".float_to_int_posInf", true);
    239     //fstps CHECK
    240     goToState(2);
    241     store_fp_stack_VR(true, OpndSize_32, vA);
    242     set_VR_to_imm(vA, OpndSize_32, 0);
    243     transferToState(3);
    244     unconditional_jump(".float_to_int_okay", true);
    245     insertLabel(".float_to_int_posInf", true);
    246     //fstps CHECK
    247     goToState(2);
    248     store_fp_stack_VR(true, OpndSize_32, vA);
    249     set_VR_to_imm(vA, OpndSize_32, 0x7fffffff);
    250     transferToState(3);
    251     unconditional_jump(".float_to_int_okay", true);
    252     insertLabel(".float_to_int_negInf", true);
    253     goToState(1);
    254     //fstps CHECK
    255     store_fp_stack_VR(true, OpndSize_32, vA);
    256     store_fp_stack_VR(true, OpndSize_32, vA);
    257     set_VR_to_imm(vA, OpndSize_32, 0x80000000);
    258     transferToState(3);
    259     insertLabel(".float_to_int_okay", true);
    260     return 0;
    261 }
    262 //! lower bytecode FLOAT_TO_INT by calling common_fp_to_int
    263 
    264 //!
    265 int op_float_to_int() {
    266     u2 vA = INST_A(inst); //destination
    267     u2 vB = INST_B(inst);
    268     int retval = common_fp_to_int(false, vA, vB);
    269     rPC += 1;
    270     return retval;
    271 }
    272 //! lower bytecode DOUBLE_TO_INT by calling common_fp_to_int
    273 
    274 //!
    275 int op_double_to_int() {
    276     u2 vA = INST_A(inst); //destination
    277     u2 vB = INST_B(inst);
    278     int retval = common_fp_to_int(true, vA, vB);
    279     rPC += 1;
    280     return retval;
    281 }
    282 
    283 //! common code to convert float or double to long
    284 
    285 //! It uses FP stack
    286 int common_fp_to_long(bool isDouble, u2 vA, u2 vB) {
    287     if(isDouble) {
    288         load_fp_stack_VR(OpndSize_64, vB); //fldl
    289     }
    290     else {
    291         load_fp_stack_VR(OpndSize_32, vB); //flds
    292     }
    293 
    294     load_fp_stack_global_data_API("valuePosInfLong", OpndSize_64);
    295     load_fp_stack_global_data_API("valueNegInfLong", OpndSize_64);
    296 
    297     //ST(0) ST(1) ST(2) --> LintMin LintMax value
    298     compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
    299     //ST(0) ST(1) --> LintMax value
    300     conditional_jump(Condition_AE, ".float_to_long_negInf", true);
    301     rememberState(1);
    302     compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
    303     rememberState(2);
    304     //ST(0) --> value
    305     conditional_jump(Condition_C, ".float_to_long_nanInf", true);
    306     //fnstcw, orw, fldcw, xorw
    307     load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    308     store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
    309     alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
    310     load_fpu_cw(0, PhysicalReg_ESP, true);
    311     alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
    312     store_int_fp_stack_VR(true/*pop*/, OpndSize_64, vA); //fistpll
    313     //fldcw
    314     load_fpu_cw(0, PhysicalReg_ESP, true);
    315     load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
    316     rememberState(3);
    317     unconditional_jump(".float_to_long_okay", true);
    318     insertLabel(".float_to_long_nanInf", true);
    319     conditional_jump(Condition_NP, ".float_to_long_posInf", true);
    320     //fstpl??
    321     goToState(2);
    322 
    323     load_global_data_API("valueNanLong", OpndSize_64, 1, false);
    324 
    325     set_virtual_reg(vA, OpndSize_64, 1, false);
    326     transferToState(3);
    327     unconditional_jump(".float_to_long_okay", true);
    328     insertLabel(".float_to_long_posInf", true);
    329     //fstpl
    330     goToState(2);
    331 
    332     load_global_data_API("valuePosInfLong", OpndSize_64, 2, false);
    333     set_virtual_reg(vA, OpndSize_64, 2, false);
    334     transferToState(3);
    335     unconditional_jump(".float_to_long_okay", true);
    336     insertLabel(".float_to_long_negInf", true);
    337     //fstpl
    338     //fstpl
    339     goToState(1);
    340 
    341     load_global_data_API("valueNegInfLong", OpndSize_64, 3, false);
    342     set_virtual_reg(vA, OpndSize_64, 3, false);
    343     transferToState(3);
    344     insertLabel(".float_to_long_okay", true);
    345     return 0;
    346 }
    347 //! lower bytecode FLOAT_TO_LONG by calling common_fp_to_long
    348 
    349 //!
    350 int op_float_to_long() {
    351     u2 vA = INST_A(inst); //destination
    352     u2 vB = INST_B(inst);
    353     int retval = common_fp_to_long(false, vA, vB);
    354     rPC += 1;
    355     return retval;
    356 }
    357 //! lower bytecode DOUBLE_TO_LONG by calling common_fp_to_long
    358 
    359 //!
    360 int op_double_to_long() {
    361     u2 vA = INST_A(inst); //destination
    362     u2 vB = INST_B(inst);
    363     int retval = common_fp_to_long(true, vA, vB);
    364     rPC += 1;
    365     return retval;
    366 }
    367 #define P_GPR_1 PhysicalReg_EBX
    368 //! lower bytecode INT_TO_BYTE
    369 
    370 //! It uses GPR
    371 int op_int_to_byte() {
    372     u2 vA = INST_A(inst); //destination
    373     u2 vB = INST_B(inst);
    374     get_virtual_reg(vB, OpndSize_32, 1, false);
    375     alu_binary_imm_reg(OpndSize_32, sal_opc, 24, 1, false);
    376     alu_binary_imm_reg(OpndSize_32, sar_opc, 24, 1, false);
    377     set_virtual_reg(vA, OpndSize_32, 1, false);
    378     rPC += 1;
    379     return 0;
    380 }
    381 //! lower bytecode INT_TO_CHAR
    382 
    383 //! It uses GPR
    384 int op_int_to_char() {
    385     u2 vA = INST_A(inst); //destination
    386     u2 vB = INST_B(inst);
    387     get_virtual_reg(vB, OpndSize_32, 1, false);
    388     alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
    389     alu_binary_imm_reg(OpndSize_32, shr_opc, 16, 1, false);
    390     set_virtual_reg(vA, OpndSize_32, 1, false);
    391     rPC += 1;
    392     return 0;
    393 }
    394 //! lower bytecode INT_TO_SHORT
    395 
    396 //! It uses GPR
    397 int op_int_to_short() {
    398     u2 vA = INST_A(inst); //destination
    399     u2 vB = INST_B(inst);
    400     get_virtual_reg(vB, OpndSize_32, 1, false);
    401     alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
    402     alu_binary_imm_reg(OpndSize_32, sar_opc, 16, 1, false);
    403     set_virtual_reg(vA, OpndSize_32, 1, false);
    404     rPC += 1;
    405     return 0;
    406 }
    407 //! common code to handle integer ALU ops
    408 
    409 //! It uses GPR
    410 int common_alu_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
    411     get_virtual_reg(v1, OpndSize_32, 1, false);
    412     //in encoder, reg is first operand, which is the destination
    413     //gpr_1 op v2(rFP) --> gpr_1
    414     //shift only works with reg cl, v2 should be in %ecx
    415     alu_binary_VR_reg(OpndSize_32, opc, v2, 1, false);
    416     set_virtual_reg(vA, OpndSize_32, 1, false);
    417     return 0;
    418 }
    419 #undef P_GPR_1
    420 #define P_GPR_1 PhysicalReg_EBX
    421 //! common code to handle integer shift ops
    422 
    423 //! It uses GPR
    424 int common_shift_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {
    425     get_virtual_reg(v2, OpndSize_32, PhysicalReg_ECX, true);
    426     get_virtual_reg(v1, OpndSize_32, 1, false);
    427     //in encoder, reg2 is first operand, which is the destination
    428     //gpr_1 op v2(rFP) --> gpr_1
    429     //shift only works with reg cl, v2 should be in %ecx
    430     alu_binary_reg_reg(OpndSize_32, opc, PhysicalReg_ECX, true, 1, false);
    431     set_virtual_reg(vA, OpndSize_32, 1, false);
    432     return 0;
    433 }
    434 #undef p_GPR_1
    435 //! lower bytecode ADD_INT by calling common_alu_int
    436 
    437 //!
    438 int op_add_int() {
    439     u2 vA, v1, v2;
    440     vA = INST_AA(inst);
    441     v1 = *((u1*)rPC + 2);
    442     v2 = *((u1*)rPC + 3);
    443     int retval = common_alu_int(add_opc, vA, v1, v2);
    444     rPC += 2;
    445     return retval;
    446 }
    447 //! lower bytecode SUB_INT by calling common_alu_int
    448 
    449 //!
    450 int op_sub_int() {
    451     u2 vA, v1, v2;
    452     vA = INST_AA(inst);
    453     v1 = *((u1*)rPC + 2);
    454     v2 = *((u1*)rPC + 3);
    455     int retval = common_alu_int(sub_opc, vA, v1, v2);
    456     rPC += 2;
    457     return retval;
    458 }
    459 //! lower bytecode MUL_INT by calling common_alu_int
    460 
    461 //!
    462 int op_mul_int() {
    463     u2 vA, v1, v2;
    464     vA = INST_AA(inst);
    465     v1 = *((u1*)rPC + 2);
    466     v2 = *((u1*)rPC + 3);
    467     int retval = common_alu_int(imul_opc, vA, v1, v2);
    468     rPC += 2;
    469     return retval;
    470 }
    471 //! lower bytecode AND_INT by calling common_alu_int
    472 
    473 //!
    474 int op_and_int() {
    475     u2 vA, v1, v2;
    476     vA = INST_AA(inst);
    477     v1 = *((u1*)rPC + 2);
    478     v2 = *((u1*)rPC + 3);
    479     int retval = common_alu_int(and_opc, vA, v1, v2);
    480     rPC += 2;
    481     return retval;
    482 }
    483 //! lower bytecode OR_INT by calling common_alu_int
    484 
    485 //!
    486 int op_or_int() {
    487     u2 vA, v1, v2;
    488     vA = INST_AA(inst);
    489     v1 = *((u1*)rPC + 2);
    490     v2 = *((u1*)rPC + 3);
    491     int retval = common_alu_int(or_opc, vA, v1, v2);
    492     rPC += 2;
    493     return retval;
    494 }
    495 //! lower bytecode XOR_INT by calling common_alu_int
    496 
    497 //!
    498 int op_xor_int() {
    499     u2 vA, v1, v2;
    500     vA = INST_AA(inst);
    501     v1 = *((u1*)rPC + 2);
    502     v2 = *((u1*)rPC + 3);
    503     int retval = common_alu_int(xor_opc, vA, v1, v2);
    504     rPC += 2;
    505     return retval;
    506 }
    507 //! lower bytecode SHL_INT by calling common_shift_int
    508 
    509 //!
    510 int op_shl_int() {
    511     u2 vA, v1, v2;
    512     vA = INST_AA(inst);
    513     v1 = *((u1*)rPC + 2);
    514     v2 = *((u1*)rPC + 3);
    515     int retval = common_shift_int(shl_opc, vA, v1, v2);
    516     rPC += 2;
    517     return retval;
    518 }
    519 //! lower bytecode SHR_INT by calling common_shift_int
    520 
    521 //!
    522 int op_shr_int() {
    523     u2 vA, v1, v2;
    524     vA = INST_AA(inst);
    525     v1 = *((u1*)rPC + 2);
    526     v2 = *((u1*)rPC + 3);
    527     int retval = common_shift_int(sar_opc, vA, v1, v2);
    528     rPC += 2;
    529     return retval;
    530 }
    531 //! lower bytecode USHR_INT by calling common_shift_int
    532 
    533 //!
    534 int op_ushr_int() {
    535     u2 vA, v1, v2;
    536     vA = INST_AA(inst);
    537     v1 = *((u1*)rPC + 2);
    538     v2 = *((u1*)rPC + 3);
    539     int retval = common_shift_int(shr_opc, vA, v1, v2);
    540     rPC += 2;
    541     return retval;
    542 }
    543 //! lower bytecode ADD_INT_2ADDR by calling common_alu_int
    544 
    545 //!
    546 int op_add_int_2addr() {
    547     u2 vA, v1, v2;
    548     vA = INST_A(inst);
    549     v1 = vA;
    550     v2 = INST_B(inst);
    551     int retval = common_alu_int(add_opc, vA, v1, v2);
    552     rPC += 1;
    553     return retval;
    554 }
    555 //! lower bytecode SUB_INT_2ADDR by calling common_alu_int
    556 
    557 //!
    558 int op_sub_int_2addr() {
    559     u2 vA, v1, v2;
    560     vA = INST_A(inst);
    561     v1 = vA;
    562     v2 = INST_B(inst);
    563     int retval = common_alu_int(sub_opc, vA, v1, v2);
    564     rPC += 1;
    565     return retval;
    566 }
    567 //! lower bytecode MUL_INT_2ADDR by calling common_alu_int
    568 
    569 //!
    570 int op_mul_int_2addr() {
    571     u2 vA, v1, v2;
    572     vA = INST_A(inst);
    573     v1 = vA;
    574     v2 = INST_B(inst);
    575     int retval = common_alu_int(imul_opc, vA, v1, v2);
    576     rPC += 1;
    577     return retval;
    578 }
    579 //! lower bytecode AND_INT_2ADDR by calling common_alu_int
    580 
    581 //!
    582 int op_and_int_2addr() {
    583     u2 vA, v1, v2;
    584     vA = INST_A(inst);
    585     v1 = vA;
    586     v2 = INST_B(inst);
    587     int retval = common_alu_int(and_opc, vA, v1, v2);
    588     rPC += 1;
    589     return retval;
    590 }
    591 //! lower bytecode OR_INT_2ADDR by calling common_alu_int
    592 
    593 //!
    594 int op_or_int_2addr() {
    595     u2 vA, v1, v2;
    596     vA = INST_A(inst);
    597     v1 = vA;
    598     v2 = INST_B(inst);
    599     int retval = common_alu_int(or_opc, vA, v1, v2);
    600     rPC += 1;
    601     return retval;
    602 }
    603 //! lower bytecode XOR_INT_2ADDR by calling common_alu_int
    604 
    605 //!
    606 int op_xor_int_2addr() {
    607     u2 vA, v1, v2;
    608     vA = INST_A(inst);
    609     v1 = vA;
    610     v2 = INST_B(inst);
    611     int retval = common_alu_int(xor_opc, vA, v1, v2);
    612     rPC += 1;
    613     return retval;
    614 }
    615 //! lower bytecode SHL_INT_2ADDR by calling common_shift_int
    616 
    617 //!
    618 int op_shl_int_2addr() {
    619     u2 vA, v1, v2;
    620     vA = INST_A(inst);
    621     v1 = vA;
    622     v2 = INST_B(inst);
    623     int retval = common_shift_int(shl_opc, vA, v1, v2);
    624     rPC += 1;
    625     return retval;
    626 }
    627 //! lower bytecode SHR_INT_2ADDR by calling common_shift_int
    628 
    629 //!
    630 int op_shr_int_2addr() {
    631     u2 vA, v1, v2;
    632     vA = INST_A(inst);
    633     v1 = vA;
    634     v2 = INST_B(inst);
    635     int retval = common_shift_int(sar_opc, vA, v1, v2);
    636     rPC += 1;
    637     return retval;
    638 }
    639 //! lower bytecode USHR_INT_2ADDR by calling common_shift_int
    640 
    641 //!
    642 int op_ushr_int_2addr() {
    643     u2 vA, v1, v2;
    644     vA = INST_A(inst);
    645     v1 = vA;
    646     v2 = INST_B(inst);
    647     int retval = common_shift_int(shr_opc, vA, v1, v2);
    648     rPC += 1;
    649     return retval;
    650 }
    651 #define P_GPR_1 PhysicalReg_EBX
    652 //!common code to handle integer DIV & REM, it used GPR
    653 
    654 //!The special case: when op0 == minint && op1 == -1, return 0 for isRem, return 0x80000000 for isDiv
    655 //!There are two merge points in the control flow for this bytecode
    656 //!make sure the reg. alloc. state is the same at merge points by calling transferToState
    657 int common_div_rem_int(bool isRem, u2 vA, u2 v1, u2 v2) {
    658     get_virtual_reg(v1, OpndSize_32, PhysicalReg_EAX, true);
    659     get_virtual_reg(v2, OpndSize_32, 2, false);
    660     compare_imm_reg(OpndSize_32, 0, 2, false);
    661     handlePotentialException(
    662                                        Condition_E, Condition_NE,
    663                                        1, "common_errDivideByZero");
    664     /////////////////// handle special cases
    665     //conditional move 0 to $edx for rem for the two special cases
    666     //conditional move 0x80000000 to $eax for div
    667     //handle -1 special case divide error
    668     compare_imm_reg(OpndSize_32, -1, 2, false);
    669     conditional_jump(Condition_NE, ".common_div_rem_int_normal", true);
    670     //handle min int special case divide error
    671     rememberState(1);
    672     compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
    673     transferToState(1);
    674     conditional_jump(Condition_E, ".common_div_rem_int_special", true);
    675 
    676     insertLabel(".common_div_rem_int_normal", true); //merge point
    677     convert_integer(OpndSize_32, OpndSize_64); //cdq
    678     //idiv: dividend in edx:eax; quotient in eax; remainder in edx
    679     alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
    680     if(isRem)
    681         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
    682     else //divide: quotient in %eax
    683         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
    684     rememberState(2);
    685     unconditional_jump(".common_div_rem_int_okay", true);
    686 
    687     insertLabel(".common_div_rem_int_special", true);
    688     goToState(1);
    689     if(isRem)
    690         set_VR_to_imm(vA, OpndSize_32, 0);
    691     else
    692         set_VR_to_imm(vA, OpndSize_32, 0x80000000);
    693     transferToState(2);
    694     insertLabel(".common_div_rem_int_okay", true); //merge point 2
    695     return 0;
    696 }
    697 #undef P_GPR_1
    698 //! lower bytecode DIV_INT by calling common_div_rem_int
    699 
    700 //!
    701 int op_div_int() {
    702     u2 vA, v1, v2;
    703     vA = INST_AA(inst);
    704     v1 = *((u1*)rPC + 2);
    705     v2 = *((u1*)rPC + 3);
    706     int retval = common_div_rem_int(false, vA, v1, v2);
    707     rPC += 2;
    708     return retval;
    709 }
    710 //! lower bytecode REM_INT by calling common_div_rem_int
    711 
    712 //!
    713 int op_rem_int() {
    714     u2 vA, v1, v2;
    715     vA = INST_AA(inst);
    716     v1 = *((u1*)rPC + 2);
    717     v2 = *((u1*)rPC + 3);
    718     int retval = common_div_rem_int(true, vA, v1, v2);
    719     rPC += 2;
    720     return retval;
    721 }
    722 //! lower bytecode DIV_INT_2ADDR by calling common_div_rem_int
    723 
    724 //!
    725 int op_div_int_2addr() {
    726     u2 vA = INST_A(inst);
    727     u2 v1 = vA;
    728     u2 v2 = INST_B(inst);
    729     int retval = common_div_rem_int(false, vA, v1, v2);
    730     rPC += 1;
    731     return retval;
    732 }
    733 //! lower bytecode REM_INT_2ADDR by calling common_div_rem_int
    734 
    735 //!
    736 int op_rem_int_2addr() {
    737     u2 vA = INST_A(inst);
    738     u2 v1 = vA;
    739     u2 v2 = INST_B(inst);
    740     int retval = common_div_rem_int(true, vA, v1, v2);
    741     rPC += 1;
    742     return retval;
    743 }
    744 
    745 #define P_GPR_1 PhysicalReg_EBX
    746 //! common code to handle integer ALU ops with literal
    747 
    748 //! It uses GPR
    749 int common_alu_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { //except div and rem
    750     get_virtual_reg(vB, OpndSize_32, 1, false);
    751     alu_binary_imm_reg(OpndSize_32, opc, imm, 1, false);
    752     set_virtual_reg(vA, OpndSize_32, 1, false);
    753     return 0;
    754 }
    755 //! calls common_alu_int_lit
    756 int common_shift_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) {
    757     return common_alu_int_lit(opc, vA, vB, imm);
    758 }
    759 #undef p_GPR_1
    760 //! lower bytecode ADD_INT_LIT16 by calling common_alu_int_lit
    761 
    762 //!
    763 int op_add_int_lit16() {
    764     u2 vA = INST_A(inst);
    765     u2 vB = INST_B(inst);
    766     s4 tmp = (s2)FETCH(1);
    767     int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
    768     rPC += 2;
    769     return retval;
    770 }
    771 
    772 int alu_rsub_int(ALU_Opcode opc, u2 vA, s2 imm, u2 vB) {
    773     move_imm_to_reg(OpndSize_32, imm, 2, false);
    774     get_virtual_reg(vB, OpndSize_32, 1, false);
    775     alu_binary_reg_reg(OpndSize_32, opc, 1, false, 2, false);
    776     set_virtual_reg(vA, OpndSize_32, 2, false);
    777     return 0;
    778 }
    779 
    780 
    781 //! lower bytecode RSUB_INT by calling common_alu_int_lit
    782 
    783 //!
    784 int op_rsub_int() {
    785     u2 vA = INST_A(inst);
    786     u2 vB = INST_B(inst);
    787     s4 tmp = (s2)FETCH(1);
    788     int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
    789     rPC += 2;
    790     return retval;
    791 }
    792 //! lower bytecode MUL_INT_LIT16 by calling common_alu_int_lit
    793 
    794 //!
    795 int op_mul_int_lit16() {
    796     u2 vA = INST_A(inst);
    797     u2 vB = INST_B(inst);
    798     s4 tmp = (s2)FETCH(1);
    799     int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
    800     rPC += 2;
    801     return retval;
    802 }
    803 //! lower bytecode AND_INT_LIT16 by calling common_alu_int_lit
    804 
    805 //!
    806 int op_and_int_lit16() {
    807     u2 vA = INST_A(inst);
    808     u2 vB = INST_B(inst);
    809     s4 tmp = (s2)FETCH(1);
    810     int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
    811     rPC += 2;
    812     return retval;
    813 }
    814 //! lower bytecode OR_INT_LIT16 by calling common_alu_int_lit
    815 
    816 //!
    817 int op_or_int_lit16() {
    818     u2 vA = INST_A(inst);
    819     u2 vB = INST_B(inst);
    820     s4 tmp = (s2)FETCH(1);
    821     int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
    822     rPC += 2;
    823     return retval;
    824 }
    825 //! lower bytecode XOR_INT_LIT16 by calling common_alu_int_lit
    826 
    827 //!
    828 int op_xor_int_lit16() {
    829     u2 vA = INST_A(inst);
    830     u2 vB = INST_B(inst);
    831     s4 tmp = (s2)FETCH(1);
    832     int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
    833     rPC += 2;
    834     return retval;
    835 }
    836 //! lower bytecode SHL_INT_LIT16 by calling common_shift_int_lit
    837 
    838 //!
    839 int op_shl_int_lit16() {
    840     u2 vA = INST_A(inst);
    841     u2 vB = INST_B(inst);
    842     s4 tmp = (s2)FETCH(1);
    843     int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
    844     rPC += 2;
    845     return retval;
    846 }
    847 //! lower bytecode SHR_INT_LIT16 by calling common_shift_int_lit
    848 
    849 //!
    850 int op_shr_int_lit16() {
    851     u2 vA = INST_A(inst);
    852     u2 vB = INST_B(inst);
    853     s4 tmp = (s2)FETCH(1);
    854     int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
    855     rPC += 2;
    856     return retval;
    857 }
    858 //! lower bytecode USHR_INT_LIT16 by calling common_shift_int_lit
    859 
    860 //!
    861 int op_ushr_int_lit16() {
    862     u2 vA = INST_A(inst);
    863     u2 vB = INST_B(inst);
    864     s4 tmp = (s2)FETCH(1);
    865     int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
    866     rPC += 2;
    867     return retval;
    868 }
    869 //! lower bytecode ADD_INT_LIT8 by calling common_alu_int_lit
    870 
    871 //!
    872 int op_add_int_lit8() {
    873     u2 vA = INST_AA(inst);
    874     u2 vB = (u2)FETCH(1) & 0xff;
    875     s2 tmp = (s2)FETCH(1) >> 8;
    876     int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
    877     rPC += 2;
    878     return retval;
    879 }
    880 //! lower bytecode RSUB_INT_LIT8 by calling common_alu_int_lit
    881 
    882 //!
    883 int op_rsub_int_lit8() {
    884     u2 vA = INST_AA(inst);
    885     u2 vB = (u2)FETCH(1) & 0xff;
    886     s2 tmp = (s2)FETCH(1) >> 8;
    887     int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
    888     rPC += 2;
    889     return retval;
    890 }
    891 //! lower bytecode MUL_INT_LIT8 by calling common_alu_int_lit
    892 
    893 //!
    894 int op_mul_int_lit8() {
    895     u2 vA = INST_AA(inst);
    896     u2 vB = (u2)FETCH(1) & 0xff;
    897     s2 tmp = (s2)FETCH(1) >> 8;
    898     int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
    899     rPC += 2;
    900     return retval;
    901 }
    902 //! lower bytecode AND_INT_LIT8 by calling common_alu_int_lit
    903 
    904 //!
    905 int op_and_int_lit8() {
    906     u2 vA = INST_AA(inst);
    907     u2 vB = (u2)FETCH(1) & 0xff;
    908     s2 tmp = (s2)FETCH(1) >> 8;
    909     int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
    910     rPC += 2;
    911     return retval;
    912 }
    913 //! lower bytecode OR_INT_LIT8 by calling common_alu_int_lit
    914 
    915 //!
    916 int op_or_int_lit8() {
    917     u2 vA = INST_AA(inst);
    918     u2 vB = (u2)FETCH(1) & 0xff;
    919     s2 tmp = (s2)FETCH(1) >> 8;
    920     int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
    921     rPC += 2;
    922     return retval;
    923 }
    924 //! lower bytecode XOR_INT_LIT8 by calling common_alu_int_lit
    925 
    926 //!
    927 int op_xor_int_lit8() {
    928     u2 vA = INST_AA(inst);
    929     u2 vB = (u2)FETCH(1) & 0xff;
    930     s2 tmp = (s2)FETCH(1) >> 8;
    931     int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
    932     rPC += 2;
    933     return retval;
    934 }
    935 //! lower bytecode SHL_INT_LIT8 by calling common_shift_int_lit
    936 
    937 //!
    938 int op_shl_int_lit8() {
    939     u2 vA = INST_AA(inst);
    940     u2 vB = (u2)FETCH(1) & 0xff;
    941     s2 tmp = (s2)FETCH(1) >> 8;
    942     int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
    943     rPC += 2;
    944     return retval;
    945 }
    946 //! lower bytecode SHR_INT_LIT8 by calling common_shift_int_lit
    947 
    948 //!
    949 int op_shr_int_lit8() {
    950     u2 vA = INST_AA(inst);
    951     u2 vB = (u2)FETCH(1) & 0xff;
    952     s2 tmp = (s2)FETCH(1) >> 8;
    953     int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
    954     rPC += 2;
    955     return retval;
    956 }
    957 //! lower bytecode USHR_INT_LIT8 by calling common_shift_int_lit
    958 
    959 //!
    960 int op_ushr_int_lit8() {
    961     u2 vA = INST_AA(inst);
    962     u2 vB = (u2)FETCH(1) & 0xff;
    963     s2 tmp = (s2)FETCH(1) >> 8;
    964     int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
    965     rPC += 2;
    966     return retval;
    967 }
    968 
    969 int isPowerOfTwo(int imm) {
    970     int i;
    971     for(i = 1; i < 17; i++) {
    972         if(imm == (1 << i)) return i;
    973     }
    974     return -1;
    975 }
    976 
    977 #define P_GPR_1 PhysicalReg_EBX
    978 int div_lit_strength_reduction(u2 vA, u2 vB, s2 imm) {
    979     if(gDvm.executionMode == kExecutionModeNcgO1) {
    980         //strength reduction for div by 2,4,8,...
    981         int power = isPowerOfTwo(imm);
    982         if(power < 1) return 0;
    983         //tmp2 is not updated, so it can share with vB
    984         get_virtual_reg(vB, OpndSize_32, 2, false);
    985         //if imm is 2, power will be 1
    986         if(power == 1) {
    987             /* mov tmp1, tmp2
    988                shrl $31, tmp1
    989                addl tmp2, tmp1
    990                sarl $1, tmp1 */
    991             move_reg_to_reg(OpndSize_32, 2, false, 1, false);
    992             alu_binary_imm_reg(OpndSize_32, shr_opc, 31, 1, false);
    993             alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
    994             alu_binary_imm_reg(OpndSize_32, sar_opc, 1, 1, false);
    995             set_virtual_reg(vA, OpndSize_32, 1, false);
    996             return 1;
    997         }
    998         //power > 1
    999         /* mov tmp1, tmp2
   1000            sarl $power-1, tmp1
   1001            shrl 32-$power, tmp1
   1002            addl tmp2, tmp1
   1003            sarl $power, tmp1 */
   1004         move_reg_to_reg(OpndSize_32, 2, false, 1, false);
   1005         alu_binary_imm_reg(OpndSize_32, sar_opc, power-1, 1, false);
   1006         alu_binary_imm_reg(OpndSize_32, shr_opc, 32-power, 1, false);
   1007         alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
   1008         alu_binary_imm_reg(OpndSize_32, sar_opc, power, 1, false);
   1009         set_virtual_reg(vA, OpndSize_32, 1, false);
   1010         return 1;
   1011     }
   1012     return 0;
   1013 }
   1014 
   1015 ////////// throws exception!!!
   1016 //! common code to handle integer DIV & REM with literal
   1017 
   1018 //! It uses GPR
   1019 int common_div_rem_int_lit(bool isRem, u2 vA, u2 vB, s2 imm) {
   1020     if(!isRem) {
   1021         int retCode = div_lit_strength_reduction(vA, vB, imm);
   1022         if(retCode > 0) return 0;
   1023     }
   1024     if(imm == 0) {
   1025         export_pc(); //use %edx
   1026 #ifdef DEBUG_EXCEPTION
   1027         LOGI("EXTRA code to handle exception");
   1028 #endif
   1029         constVREndOfBB();
   1030         beforeCall("exception"); //dump GG, GL VRs
   1031         unconditional_jump_global_API(
   1032                           "common_errDivideByZero", false);
   1033 
   1034         return 0;
   1035     }
   1036     get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
   1037     //check against -1 for DIV_INT??
   1038     if(imm == -1) {
   1039         compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
   1040         conditional_jump(Condition_E, ".div_rem_int_lit_special", true);
   1041         rememberState(1);
   1042     }
   1043     move_imm_to_reg(OpndSize_32, imm, 2, false);
   1044     convert_integer(OpndSize_32, OpndSize_64); //cdq
   1045     //idiv: dividend in edx:eax; quotient in eax; remainder in edx
   1046     alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
   1047     if(isRem)
   1048         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
   1049     else
   1050         set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
   1051 
   1052     if(imm == -1) {
   1053         unconditional_jump(".div_rem_int_lit_okay", true);
   1054         rememberState(2);
   1055         insertLabel(".div_rem_int_lit_special", true);
   1056         goToState(1);
   1057         if(isRem)
   1058             set_VR_to_imm(vA, OpndSize_32, 0);
   1059         else
   1060             set_VR_to_imm(vA, OpndSize_32, 0x80000000);
   1061         transferToState(2);
   1062     }
   1063 
   1064     insertLabel(".div_rem_int_lit_okay", true); //merge point 2
   1065     return 0;
   1066 }
   1067 #undef P_GPR_1
   1068 //! lower bytecode DIV_INT_LIT16 by calling common_div_rem_int_lit
   1069 
   1070 //!
   1071 int op_div_int_lit16() {
   1072     u2 vA = INST_A(inst);
   1073     u2 vB = INST_B(inst);
   1074     s4 tmp = (s2)FETCH(1);
   1075     int retval = common_div_rem_int_lit(false, vA, vB, tmp);
   1076     rPC += 2;
   1077     return retval;
   1078 }
   1079 //! lower bytecode REM_INT_LIT16 by calling common_div_rem_int_lit
   1080 
   1081 //!
   1082 int op_rem_int_lit16() {
   1083     u2 vA = INST_A(inst);
   1084     u2 vB = INST_B(inst);
   1085     s4 tmp = (s2)FETCH(1);
   1086     int retval = common_div_rem_int_lit(true, vA, vB, tmp);
   1087     rPC += 2;
   1088     return retval;
   1089 }
   1090 //! lower bytecode DIV_INT_LIT8 by calling common_div_rem_int_lit
   1091 
   1092 //!
   1093 int op_div_int_lit8() {
   1094     u2 vA = INST_AA(inst);
   1095     u2 vB = (u2)FETCH(1) & 0xff;
   1096     s2 tmp = (s2)FETCH(1) >> 8;
   1097     int retval = common_div_rem_int_lit(false, vA, vB, tmp);
   1098     rPC += 2;
   1099     return retval;
   1100 }
   1101 //! lower bytecode REM_INT_LIT8 by calling common_div_rem_int_lit
   1102 
   1103 //!
   1104 int op_rem_int_lit8() {
   1105     u2 vA = INST_AA(inst);
   1106     u2 vB = (u2)FETCH(1) & 0xff;
   1107     s2 tmp = (s2)FETCH(1) >> 8;
   1108     int retval = common_div_rem_int_lit(true, vA, vB, tmp);
   1109     rPC += 2;
   1110     return retval;
   1111 }
   1112 //! common code to hanle long ALU ops
   1113 
   1114 //! It uses XMM
   1115 int common_alu_long(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
   1116     get_virtual_reg(v1, OpndSize_64, 1, false);
   1117     get_virtual_reg(v2, OpndSize_64, 2, false);
   1118     alu_binary_reg_reg(OpndSize_64, opc, 2, false, 1, false);
   1119     set_virtual_reg(vA, OpndSize_64, 1, false);
   1120     return 0;
   1121 }
   1122 //! lower bytecode ADD_LONG by calling common_alu_long
   1123 
   1124 //!
   1125 int op_add_long() {
   1126     u2 vA = INST_AA(inst);
   1127     u2 v1 = *((u1*)rPC + 2);
   1128     u2 v2 = *((u1*)rPC + 3);
   1129     int retval = common_alu_long(add_opc, vA, v1, v2);
   1130     rPC += 2;
   1131     return retval;
   1132 }
   1133 //! lower bytecode SUB_LONG by calling common_alu_long
   1134 
   1135 //!
   1136 int op_sub_long() {
   1137     u2 vA = INST_AA(inst);
   1138     u2 v1 = *((u1*)rPC + 2);
   1139     u2 v2 = *((u1*)rPC + 3);
   1140     int retval = common_alu_long(sub_opc, vA, v1, v2);
   1141     rPC += 2;
   1142     return retval;
   1143 }
   1144 //! lower bytecode AND_LONG by calling common_alu_long
   1145 
   1146 //!
   1147 int op_and_long() {
   1148     u2 vA = INST_AA(inst);
   1149     u2 v1 = *((u1*)rPC + 2);
   1150     u2 v2 = *((u1*)rPC + 3);
   1151     int retval = common_alu_long(and_opc, vA, v1, v2);
   1152     rPC += 2;
   1153     return retval;
   1154 }
   1155 //! lower bytecode OR_LONG by calling common_alu_long
   1156 
   1157 //!
   1158 int op_or_long() {
   1159     u2 vA = INST_AA(inst);
   1160     u2 v1 = *((u1*)rPC + 2);
   1161     u2 v2 = *((u1*)rPC + 3);
   1162     int retval = common_alu_long(or_opc, vA, v1, v2);
   1163     rPC += 2;
   1164     return retval;
   1165 }
   1166 //! lower bytecode XOR_LONG by calling common_alu_long
   1167 
   1168 //!
   1169 int op_xor_long() {
   1170     u2 vA = INST_AA(inst);
   1171     u2 v1 = *((u1*)rPC + 2);
   1172     u2 v2 = *((u1*)rPC + 3);
   1173     int retval = common_alu_long(xor_opc, vA, v1, v2);
   1174     rPC += 2;
   1175     return retval;
   1176 }
   1177 //! lower bytecode ADD_LONG_2ADDR by calling common_alu_long
   1178 
   1179 //!
   1180 int op_add_long_2addr() {
   1181     u2 vA = INST_A(inst);
   1182     u2 v1 = vA;
   1183     u2 v2 = INST_B(inst);
   1184     int retval = common_alu_long(add_opc, vA, v1, v2);
   1185     rPC += 1;
   1186     return retval;
   1187 }
   1188 //! lower bytecode SUB_LONG_2ADDR by calling common_alu_long
   1189 
   1190 //!
   1191 int op_sub_long_2addr() {
   1192     u2 vA = INST_A(inst);
   1193     u2 v1 = vA;
   1194     u2 v2 = INST_B(inst);
   1195     int retval = common_alu_long(sub_opc, vA, v1, v2);
   1196     rPC += 1;
   1197     return retval;
   1198 }
   1199 //! lower bytecode AND_LONG_2ADDR by calling common_alu_long
   1200 
   1201 //!
   1202 int op_and_long_2addr() {
   1203     u2 vA = INST_A(inst);
   1204     u2 v1 = vA;
   1205     u2 v2 = INST_B(inst);
   1206     int retval = common_alu_long(and_opc, vA, v1, v2);
   1207     rPC += 1;
   1208     return retval;
   1209 }
   1210 //! lower bytecode OR_LONG_2ADDR by calling common_alu_long
   1211 
   1212 //!
   1213 int op_or_long_2addr() {
   1214     u2 vA = INST_A(inst);
   1215     u2 v1 = vA;
   1216     u2 v2 = INST_B(inst);
   1217     int retval = common_alu_long(or_opc, vA, v1, v2);
   1218     rPC += 1;
   1219     return retval;
   1220 }
   1221 //! lower bytecode XOR_LONG_2ADDR by calling common_alu_long
   1222 
   1223 //!
   1224 int op_xor_long_2addr() {
   1225     u2 vA = INST_A(inst);
   1226     u2 v1 = vA;
   1227     u2 v2 = INST_B(inst);
   1228     int retval = common_alu_long(xor_opc, vA, v1, v2);
   1229     rPC += 1;
   1230     return retval;
   1231 }
   1232 
   1233 //signed vs unsigned imul and mul?
   1234 #define P_GPR_1 PhysicalReg_EBX
   1235 #define P_GPR_2 PhysicalReg_ECX
   1236 #define P_GPR_3 PhysicalReg_ESI
   1237 //! common code to handle multiplication of long
   1238 
   1239 //! It uses GPR
   1240 int common_mul_long(u2 vA, u2 v1, u2 v2) {
   1241     get_virtual_reg(v2, OpndSize_32, 1, false);
   1242     move_reg_to_reg(OpndSize_32, 1, false, PhysicalReg_EAX, true);
   1243     //imul: 2L * 1H update temporary 1
   1244     alu_binary_VR_reg(OpndSize_32, imul_opc, (v1+1), 1, false);
   1245     get_virtual_reg(v1, OpndSize_32, 3, false);
   1246     move_reg_to_reg(OpndSize_32, 3, false, 2, false);
   1247     //imul: 1L * 2H
   1248     alu_binary_VR_reg(OpndSize_32, imul_opc, (v2+1), 2, false);
   1249     alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
   1250     alu_unary_reg(OpndSize_32, mul_opc, 3, false);
   1251     alu_binary_reg_reg(OpndSize_32, add_opc, PhysicalReg_EDX, true, 1, false);
   1252     set_virtual_reg(vA+1, OpndSize_32, 1, false);
   1253     set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
   1254     return 0;
   1255 }
   1256 #undef P_GPR_1
   1257 #undef P_GPR_2
   1258 #undef P_GPR_3
   1259 //! lower bytecode MUL_LONG by calling common_mul_long
   1260 
   1261 //!
   1262 int op_mul_long() {
   1263     u2 vA = INST_AA(inst);
   1264     u2 v1 = *((u1*)rPC + 2);
   1265     u2 v2 = *((u1*)rPC + 3);
   1266     int retval = common_mul_long(vA, v1, v2);
   1267     rPC += 2;
   1268     return retval;
   1269 }
   1270 //! lower bytecode MUL_LONG_2ADDR by calling common_mul_long
   1271 
   1272 //!
   1273 int op_mul_long_2addr() {
   1274     u2 vA = INST_A(inst);
   1275     u2 v1 = vA;
   1276     u2 v2 = INST_B(inst);
   1277     int retval = common_mul_long(vA, v1, v2);
   1278     rPC += 1;
   1279     return retval;
   1280 }
   1281 
   1282 #define P_GPR_1 PhysicalReg_EBX
   1283 #define P_GPR_2 PhysicalReg_ECX
   1284 //! common code to handle DIV & REM of long
   1285 
   1286 //! It uses GPR & XMM; and calls call_moddi3 & call_divdi3
   1287 int common_div_rem_long(bool isRem, u2 vA, u2 v1, u2 v2) {
   1288     get_virtual_reg(v2, OpndSize_32, 1, false);
   1289     get_virtual_reg(v2+1, OpndSize_32, 2, false);
   1290     //save to native stack before changing register P_GPR_1
   1291     load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1292     move_reg_to_mem(OpndSize_32, 1, false, 8, PhysicalReg_ESP, true);
   1293     alu_binary_reg_reg(OpndSize_32, or_opc, 2, false, 1, false);
   1294 
   1295     handlePotentialException(
   1296                                        Condition_E, Condition_NE,
   1297                                        1, "common_errDivideByZero");
   1298     move_reg_to_mem(OpndSize_32, 2, false, 12, PhysicalReg_ESP, true);
   1299     get_virtual_reg(v1, OpndSize_64, 1, false);
   1300     move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
   1301     scratchRegs[0] = PhysicalReg_SCRATCH_1;
   1302     nextVersionOfHardReg(PhysicalReg_EDX, 2); //next version has 2 refs
   1303     if(isRem)
   1304         call_moddi3();
   1305     else
   1306         call_divdi3();
   1307     load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1308     set_virtual_reg(vA+1, OpndSize_32,PhysicalReg_EDX, true);
   1309     set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
   1310     return 0;
   1311 }
   1312 #undef P_GPR_1
   1313 #undef P_GPR_2
   1314 //! lower bytecode DIV_LONG by calling common_div_rem_long
   1315 
   1316 //!
   1317 int op_div_long() {
   1318     u2 vA = INST_AA(inst);
   1319     u2 v1 = *((u1*)rPC + 2);
   1320     u2 v2 = *((u1*)rPC + 3);
   1321     int retval = common_div_rem_long(false, vA, v1, v2);
   1322     rPC += 2;
   1323     return retval;
   1324 }
   1325 //! lower bytecode REM_LONG by calling common_div_rem_long
   1326 
   1327 //!
   1328 int op_rem_long() {
   1329     u2 vA = INST_AA(inst);
   1330     u2 v1 = *((u1*)rPC + 2);
   1331     u2 v2 = *((u1*)rPC + 3);
   1332     int retval = common_div_rem_long(true, vA, v1, v2);
   1333     rPC += 2;
   1334     return retval;
   1335 }
   1336 //! lower bytecode DIV_LONG_2ADDR by calling common_div_rem_long
   1337 
   1338 //!
   1339 int op_div_long_2addr() {
   1340     u2 vA = INST_A(inst);
   1341     u2 v1 = vA;
   1342     u2 v2 = INST_B(inst);
   1343     int retval = common_div_rem_long(false, vA, v1, v2);
   1344     rPC += 1;
   1345     return retval;
   1346 }
   1347 //! lower bytecode REM_LONG_2ADDR by calling common_div_rem_long
   1348 
   1349 //!
   1350 int op_rem_long_2addr() { //call __moddi3 instead of __divdi3
   1351     u2 vA = INST_A(inst);
   1352     u2 v1 = vA;
   1353     u2 v2 = INST_B(inst);
   1354     int retval = common_div_rem_long(true, vA, v1, v2);
   1355     rPC += 1;
   1356     return retval;
   1357 }
   1358 
   1359 //! common code to handle SHL long
   1360 
   1361 //! It uses XMM
   1362 int common_shl_long(u2 vA, u2 v1, u2 v2) {
   1363     get_VR_ss(v2, 2, false);
   1364 
   1365     load_global_data_API("shiftMask", OpndSize_64, 3, false);
   1366 
   1367     get_virtual_reg(v1, OpndSize_64, 1, false);
   1368     alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
   1369     alu_binary_reg_reg(OpndSize_64, sll_opc, 2, false, 1, false);
   1370     set_virtual_reg(vA, OpndSize_64, 1, false);
   1371     return 0;
   1372 }
   1373 
   1374 //! common code to handle SHR long
   1375 
   1376 //! It uses XMM
   1377 int common_shr_long(u2 vA, u2 v1, u2 v2) {
   1378     get_VR_ss(v2, 2, false);
   1379 
   1380     load_global_data_API("shiftMask", OpndSize_64, 3, false);
   1381 
   1382     get_virtual_reg(v1, OpndSize_64, 1, false);
   1383     alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
   1384     alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
   1385     compare_imm_VR(OpndSize_32, 0, (v1+1));
   1386     conditional_jump(Condition_GE, ".common_shr_long_special", true);
   1387     rememberState(1);
   1388 
   1389     load_global_data_API("value64", OpndSize_64, 4, false);
   1390 
   1391     alu_binary_reg_reg(OpndSize_64, sub_opc, 2, false, 4, false);
   1392 
   1393     load_global_data_API("64bits", OpndSize_64, 5, false);
   1394 
   1395     alu_binary_reg_reg(OpndSize_64, sll_opc, 4, false, 5, false);
   1396     alu_binary_reg_reg(OpndSize_64, or_opc, 5, false, 1, false);
   1397     rememberState(2);
   1398     //check whether the target is next instruction TODO
   1399     unconditional_jump(".common_shr_long_done", true);
   1400 
   1401     insertLabel(".common_shr_long_special", true);
   1402     goToState(1);
   1403     transferToState(2);
   1404     insertLabel(".common_shr_long_done", true);
   1405     set_virtual_reg(vA, OpndSize_64, 1, false);
   1406     return 0;
   1407 }
   1408 
   1409 //! common code to handle USHR long
   1410 
   1411 //! It uses XMM
   1412 int common_ushr_long(u2 vA, u2 v1, u2 v2) {
   1413     get_VR_sd(v1, 1, false);
   1414     get_VR_ss(v2, 2, false);
   1415 
   1416     load_sd_global_data_API("shiftMask", 3, false);
   1417 
   1418     alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
   1419     alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
   1420     set_VR_sd(vA, 1, false);
   1421     return 0;
   1422 }
   1423 //! lower bytecode SHL_LONG by calling common_shl_long
   1424 
   1425 //!
   1426 int op_shl_long() {
   1427     u2 vA = INST_AA(inst);
   1428     u2 v1 = *((u1*)rPC + 2);
   1429     u2 v2 = *((u1*)rPC + 3);
   1430     int retval = common_shl_long(vA, v1, v2);
   1431     rPC += 2;
   1432     return retval;
   1433 }
   1434 //! lower bytecode SHL_LONG_2ADDR by calling common_shl_long
   1435 
   1436 //!
   1437 int op_shl_long_2addr() {
   1438     u2 vA = INST_A(inst);
   1439     u2 v1 = vA;
   1440     u2 v2 = INST_B(inst);
   1441     int retval = common_shl_long(vA, v1, v2);
   1442     rPC += 1;
   1443     return retval;
   1444 }
   1445 //! lower bytecode SHR_LONG by calling common_shr_long
   1446 
   1447 //!
   1448 int op_shr_long() {
   1449     u2 vA = INST_AA(inst);
   1450     u2 v1 = *((u1*)rPC + 2);
   1451     u2 v2 = *((u1*)rPC + 3);
   1452     int retval = common_shr_long(vA, v1, v2);
   1453     rPC += 2;
   1454     return retval;
   1455 }
   1456 //! lower bytecode SHR_LONG_2ADDR by calling common_shr_long
   1457 
   1458 //!
   1459 int op_shr_long_2addr() {
   1460     u2 vA = INST_A(inst);
   1461     u2 v1 = vA;
   1462     u2 v2 = INST_B(inst);
   1463     int retval = common_shr_long(vA, v1, v2);
   1464     rPC += 1;
   1465     return retval;
   1466 }
   1467 //! lower bytecode USHR_LONG by calling common_ushr_long
   1468 
   1469 //!
   1470 int op_ushr_long() {
   1471     u2 vA = INST_AA(inst);
   1472     u2 v1 = *((u1*)rPC + 2);
   1473     u2 v2 = *((u1*)rPC + 3);
   1474     int retval = common_ushr_long(vA, v1, v2);
   1475     rPC += 2;
   1476     return retval;
   1477 }
   1478 //! lower bytecode USHR_LONG_2ADDR by calling common_ushr_long
   1479 
   1480 //!
   1481 int op_ushr_long_2addr() {
   1482     u2 vA = INST_A(inst);
   1483     u2 v1 = vA;
   1484     u2 v2 = INST_B(inst);
   1485     int retval = common_ushr_long(vA, v1, v2);
   1486     rPC += 1;
   1487     return retval;
   1488 }
   1489 #define USE_MEM_OPERAND
   1490 ///////////////////////////////////////////
   1491 //! common code to handle ALU of floats
   1492 
   1493 //! It uses XMM
   1494 int common_alu_float(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
   1495     get_VR_ss(v1, 1, false);
   1496 #ifdef USE_MEM_OPERAND
   1497     alu_sd_binary_VR_reg(opc, v2, 1, false, false/*isSD*/);
   1498 #else
   1499     get_VR_ss(v2, 2, false);
   1500     alu_ss_binary_reg_reg(opc, 2, false, 1, false);
   1501 #endif
   1502     set_VR_ss(vA, 1, false);
   1503     return 0;
   1504 }
   1505 //! lower bytecode ADD_FLOAT by calling common_alu_float
   1506 
   1507 //!
   1508 int op_add_float() {
   1509     u2 vA = INST_AA(inst);
   1510     u2 v1 = *((u1*)rPC + 2);
   1511     u2 v2 = *((u1*)rPC + 3);
   1512     int retval = common_alu_float(add_opc, vA, v1, v2);
   1513     rPC += 2;
   1514     return retval;
   1515 }
   1516 //! lower bytecode SUB_FLOAT by calling common_alu_float
   1517 
   1518 //!
   1519 int op_sub_float() {
   1520     u2 vA = INST_AA(inst);
   1521     u2 v1 = *((u1*)rPC + 2);
   1522     u2 v2 = *((u1*)rPC + 3);
   1523     int retval = common_alu_float(sub_opc, vA, v1, v2);
   1524     rPC += 2;
   1525     return retval;
   1526 }
   1527 //! lower bytecode MUL_FLOAT by calling common_alu_float
   1528 
   1529 //!
   1530 int op_mul_float() {
   1531     u2 vA = INST_AA(inst);
   1532     u2 v1 = *((u1*)rPC + 2);
   1533     u2 v2 = *((u1*)rPC + 3);
   1534     int retval = common_alu_float(mul_opc, vA, v1, v2);
   1535     rPC += 2;
   1536     return retval;
   1537 }
   1538 //! lower bytecode ADD_FLOAT_2ADDR by calling common_alu_float
   1539 
   1540 //!
   1541 int op_add_float_2addr() {
   1542     u2 vA = INST_A(inst);
   1543     u2 v1 = vA;
   1544     u2 v2 = INST_B(inst);
   1545     int retval = common_alu_float(add_opc, vA, v1, v2);
   1546     rPC += 1;
   1547     return retval;
   1548 }
   1549 //! lower bytecode SUB_FLOAT_2ADDR by calling common_alu_float
   1550 
   1551 //!
   1552 int op_sub_float_2addr() {
   1553     u2 vA = INST_A(inst);
   1554     u2 v1 = vA;
   1555     u2 v2 = INST_B(inst);
   1556     int retval = common_alu_float(sub_opc, vA, v1, v2);
   1557     rPC += 1;
   1558     return retval;
   1559 }
   1560 //! lower bytecode MUL_FLOAT_2ADDR by calling common_alu_float
   1561 
   1562 //!
   1563 int op_mul_float_2addr() {
   1564     u2 vA = INST_A(inst);
   1565     u2 v1 = vA;
   1566     u2 v2 = INST_B(inst);
   1567     int retval = common_alu_float(mul_opc, vA, v1, v2);
   1568     rPC += 1;
   1569     return retval;
   1570 }
   1571 //! common code to handle DIV of float
   1572 
   1573 //! It uses FP stack
   1574 int common_div_float(u2 vA, u2 v1, u2 v2) {
   1575     load_fp_stack_VR(OpndSize_32, v1); //flds
   1576     fpu_VR(div_opc, OpndSize_32, v2);
   1577     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
   1578     return 0;
   1579 }
   1580 //! lower bytecode DIV_FLOAT by calling common_div_float
   1581 
   1582 //!
   1583 int op_div_float() {
   1584     u2 vA = INST_AA(inst);
   1585     u2 v1 = *((u1*)rPC + 2);
   1586     u2 v2 = *((u1*)rPC + 3);
   1587     int retval = common_alu_float(div_opc, vA, v1, v2);
   1588     rPC += 2;
   1589     return retval;
   1590 }
   1591 //! lower bytecode DIV_FLOAT_2ADDR by calling common_div_float
   1592 
   1593 //!
   1594 int op_div_float_2addr() {
   1595     u2 vA = INST_A(inst);
   1596     u2 v1 = vA;
   1597     u2 v2 = INST_B(inst);
   1598     int retval = common_alu_float(div_opc, vA, v1, v2);
   1599     rPC += 1;
   1600     return retval;
   1601 }
   1602 //! common code to handle DIV of double
   1603 
   1604 //! It uses XMM
   1605 int common_alu_double(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
   1606     get_VR_sd(v1, 1, false);
   1607 #ifdef USE_MEM_OPERAND
   1608     alu_sd_binary_VR_reg(opc, v2, 1, false, true /*isSD*/);
   1609 #else
   1610     get_VR_sd(v2, 2, false);
   1611     alu_sd_binary_reg_reg(opc, 2, false, 1, false);
   1612 #endif
   1613     set_VR_sd(vA, 1, false);
   1614     return 0;
   1615 }
   1616 //! lower bytecode ADD_DOUBLE by calling common_alu_double
   1617 
   1618 //!
   1619 int op_add_double() {
   1620     u2 vA = INST_AA(inst);
   1621     u2 v1 = *((u1*)rPC + 2);
   1622     u2 v2 = *((u1*)rPC + 3);
   1623     int retval = common_alu_double(add_opc, vA, v1, v2);
   1624     rPC += 2;
   1625     return retval;
   1626 }
   1627 //! lower bytecode SUB_DOUBLE by calling common_alu_double
   1628 
   1629 //!
   1630 int op_sub_double() {
   1631     u2 vA = INST_AA(inst);
   1632     u2 v1 = *((u1*)rPC + 2);
   1633     u2 v2 = *((u1*)rPC + 3);
   1634     int retval = common_alu_double(sub_opc, vA, v1, v2);
   1635     rPC += 2;
   1636     return retval;
   1637 }
   1638 //! lower bytecode MUL_DOUBLE by calling common_alu_double
   1639 
   1640 //!
   1641 int op_mul_double() {
   1642     u2 vA = INST_AA(inst);
   1643     u2 v1 = *((u1*)rPC + 2);
   1644     u2 v2 = *((u1*)rPC + 3);
   1645     int retval = common_alu_double(mul_opc, vA, v1, v2);
   1646     rPC += 2;
   1647     return retval;
   1648 }
   1649 //! lower bytecode ADD_DOUBLE_2ADDR by calling common_alu_double
   1650 
   1651 //!
   1652 int op_add_double_2addr() {
   1653     u2 vA = INST_A(inst);
   1654     u2 v1 = vA;
   1655     u2 v2 = INST_B(inst);
   1656     int retval = common_alu_double(add_opc, vA, v1, v2);
   1657     rPC += 1;
   1658     return retval;
   1659 }
   1660 //! lower bytecode SUB_DOUBLE_2ADDR by calling common_alu_double
   1661 
   1662 //!
   1663 int op_sub_double_2addr() {
   1664     u2 vA = INST_A(inst);
   1665     u2 v1 = vA;
   1666     u2 v2 = INST_B(inst);
   1667     int retval = common_alu_double(sub_opc, vA, v1, v2);
   1668     rPC += 1;
   1669     return retval;
   1670 }
   1671 //! lower bytecode MUL_DOUBLE_2ADDR by calling common_alu_double
   1672 
   1673 //!
   1674 int op_mul_double_2addr() {
   1675     u2 vA = INST_A(inst);
   1676     u2 v1 = vA;
   1677     u2 v2 = INST_B(inst);
   1678     int retval = common_alu_double(mul_opc, vA, v1, v2);
   1679     rPC += 1;
   1680     return retval;
   1681 }
   1682 //! common code to handle DIV of double
   1683 
   1684 //! It uses FP stack
   1685 int common_div_double(u2 vA, u2 v1, u2 v2) {
   1686     load_fp_stack_VR(OpndSize_64, v1); //fldl
   1687     fpu_VR(div_opc, OpndSize_64, v2); //fdivl
   1688     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
   1689     return 0;
   1690 }
   1691 //! lower bytecode DIV_DOUBLE by calling common_div_double
   1692 
   1693 //!
   1694 int op_div_double() {
   1695     u2 vA = INST_AA(inst);
   1696     u2 v1 = *((u1*)rPC + 2);
   1697     u2 v2 = *((u1*)rPC + 3);
   1698     int retval = common_alu_double(div_opc, vA, v1, v2);
   1699     rPC += 2;
   1700     return retval;
   1701 }
   1702 //! lower bytecode DIV_DOUBLE_2ADDR by calling common_div_double
   1703 
   1704 //!
   1705 int op_div_double_2addr() {
   1706     u2 vA = INST_A(inst);
   1707     u2 v1 = vA;
   1708     u2 v2 = INST_B(inst);
   1709     int retval = common_alu_double(div_opc, vA, v1, v2);
   1710     rPC += 1;
   1711     return retval;
   1712 }
   1713 #define P_GPR_1 PhysicalReg_EBX
   1714 #define P_GPR_2 PhysicalReg_ECX
   1715 //! common code to handle REM of float
   1716 
   1717 //! It uses GPR & calls call_fmodf
   1718 int common_rem_float(u2 vA, u2 v1, u2 v2) {
   1719     get_virtual_reg(v1, OpndSize_32, 1, false);
   1720     get_virtual_reg(v2, OpndSize_32, 2, false);
   1721     load_effective_addr(-8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1722     move_reg_to_mem(OpndSize_32, 1, false, 0, PhysicalReg_ESP, true);
   1723     move_reg_to_mem(OpndSize_32, 2, false, 4, PhysicalReg_ESP, true);
   1724     scratchRegs[0] = PhysicalReg_SCRATCH_1;
   1725     call_fmodf(); //(float x, float y) return float
   1726     load_effective_addr(8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1727     store_fp_stack_VR(true, OpndSize_32, vA); //fstps
   1728     return 0;
   1729 }
   1730 #undef P_GPR_1
   1731 #undef P_GPR_2
   1732 //! lower bytecode REM_FLOAT by calling common_rem_float
   1733 
   1734 //!
   1735 int op_rem_float() {
   1736     u2 vA = INST_AA(inst);
   1737     u2 v1 = *((u1*)rPC + 2);
   1738     u2 v2 = *((u1*)rPC + 3);
   1739     int retval = common_rem_float(vA, v1, v2);
   1740     rPC += 2;
   1741     return retval;
   1742 }
   1743 //! lower bytecode REM_FLOAT_2ADDR by calling common_rem_float
   1744 
   1745 //!
   1746 int op_rem_float_2addr() {
   1747     u2 vA = INST_A(inst);
   1748     u2 v1 = vA;
   1749     u2 v2 = INST_B(inst);
   1750     int retval = common_rem_float(vA, v1, v2);
   1751     rPC += 1;
   1752     return retval;
   1753 }
   1754 //! common code to handle REM of double
   1755 
   1756 //! It uses XMM & calls call_fmod
   1757 int common_rem_double(u2 vA, u2 v1, u2 v2) {
   1758     get_virtual_reg(v1, OpndSize_64, 1, false);
   1759     get_virtual_reg(v2, OpndSize_64, 2, false);
   1760     load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1761     move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
   1762     move_reg_to_mem(OpndSize_64, 2, false, 8, PhysicalReg_ESP, true);
   1763     scratchRegs[0] = PhysicalReg_SCRATCH_1;
   1764     call_fmod(); //(long double x, long double y) return double
   1765     load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
   1766     store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
   1767     return 0;
   1768 }
   1769 //! lower bytecode REM_DOUBLE by calling common_rem_double
   1770 
   1771 //!
   1772 int op_rem_double() {
   1773     u2 vA = INST_AA(inst);
   1774     u2 v1 = *((u1*)rPC + 2);
   1775     u2 v2 = *((u1*)rPC + 3);
   1776     int retval = common_rem_double(vA, v1, v2);
   1777     rPC += 2;
   1778     return retval;
   1779 }
   1780 //! lower bytecode REM_DOUBLE_2ADDR by calling common_rem_double
   1781 
   1782 //!
   1783 int op_rem_double_2addr() {
   1784     u2 vA = INST_A(inst);
   1785     u2 v1 = vA;
   1786     u2 v2 = INST_B(inst);
   1787     int retval = common_rem_double(vA, v1, v2);
   1788     rPC += 1;
   1789     return retval;
   1790 }
   1791 //! lower bytecode CMPL_FLOAT
   1792 
   1793 //!
   1794 int op_cmpl_float() {
   1795     u2 vA = INST_AA(inst);
   1796     u4 v1 = FETCH(1) & 0xff;
   1797     u4 v2 = FETCH(1) >> 8;
   1798     get_VR_ss(v1, 1, false); //xmm
   1799     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1800     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1801     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1802     compare_VR_ss_reg(v2, 1, false);
   1803     //default: 0xffffffff??
   1804     move_imm_to_reg(OpndSize_32,
   1805                                  0xffffffff, 4, false);
   1806     //ORDER of cmov matters !!! (Z,P,A)
   1807     //finalNaN: unordered 0xffffffff
   1808     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1809                                              1, false, 4, false);
   1810     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1811                                              3, false, 4, false);
   1812     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1813                                              2, false, 4, false);
   1814     set_virtual_reg(vA, OpndSize_32, 4, false);
   1815     rPC += 2;
   1816     return 0;
   1817 }
   1818 //! lower bytecode CMPG_FLOAT
   1819 
   1820 //!
   1821 int op_cmpg_float() {
   1822     u2 vA = INST_AA(inst);
   1823     u4 v1 = FETCH(1) & 0xff;
   1824     u4 v2 = FETCH(1) >> 8;
   1825     get_VR_ss(v1, 1, false);
   1826     compare_VR_ss_reg(v2, 1, false);
   1827     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1828     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1829     //default: 0xffffffff??
   1830     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1831     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1832                                 1, false, 3, false);
   1833     //finalNaN: unordered
   1834     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1835                                 2, false, 3, false);
   1836     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1837                                 2, false, 3, false);
   1838     set_virtual_reg(vA, OpndSize_32, 3, false);
   1839     rPC += 2;
   1840     return 0;
   1841 }
   1842 //! lower bytecode CMPL_DOUBLE
   1843 
   1844 //!
   1845 int op_cmpl_double() {
   1846     u2 vA = INST_AA(inst);
   1847     u4 v1 = FETCH(1) & 0xff;
   1848     u4 v2 = FETCH(1) >> 8;
   1849     get_VR_sd(v1, 1, false);
   1850     compare_VR_sd_reg(v2, 1, false);
   1851     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1852     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1853     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1854 
   1855     //default: 0xffffffff??
   1856     move_imm_to_reg(OpndSize_32, 0xffffffff, 4, false);
   1857     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1858                                              1, false, 4, false);
   1859     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1860                                              3, false, 4, false);
   1861     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1862                                              2, false, 4, false);
   1863     set_virtual_reg(vA, OpndSize_32, 4, false);
   1864     rPC += 2;
   1865     return 0;
   1866 }
   1867 //! lower bytecode CMPG_DOUBLE
   1868 
   1869 //!
   1870 int op_cmpg_double() {
   1871     u2 vA = INST_AA(inst);
   1872     u4 v1 = FETCH(1) & 0xff;
   1873     u4 v2 = FETCH(1) >> 8;
   1874     get_VR_sd(v1, 1, false);
   1875     compare_VR_sd_reg(v2, 1, false);
   1876     move_imm_to_reg(OpndSize_32, 0, 1, false);
   1877     move_imm_to_reg(OpndSize_32, 1, 2, false);
   1878 
   1879     //default: 0xffffffff??
   1880     move_imm_to_reg(OpndSize_32,
   1881                                  0xffffffff, 3, false);
   1882     conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
   1883                                              1, false, 3, false);
   1884     //finalNaN: unordered
   1885     conditional_move_reg_to_reg(OpndSize_32, Condition_P,
   1886                                              2, false, 3, false);
   1887     conditional_move_reg_to_reg(OpndSize_32, Condition_A,
   1888                                              2, false, 3, false);
   1889    set_virtual_reg(vA, OpndSize_32, 3, false);
   1890     rPC += 2;
   1891     return 0;
   1892 }
   1893 #define P_GPR_1 PhysicalReg_EBX
   1894 #define P_GPR_2 PhysicalReg_ECX
   1895 #define P_GPR_3 PhysicalReg_ESI
   1896 #define P_SCRATCH_1 PhysicalReg_EDX
   1897 #define P_SCRATCH_2 PhysicalReg_EAX
   1898 #define OPTION_OLD //for simpler cfg
   1899 //! lower bytecode CMP_LONG
   1900 
   1901 //!
   1902 int op_cmp_long() {
   1903     u2 vA = INST_AA(inst);
   1904     u4 v1 = FETCH(1) & 0xff;
   1905     u4 v2 = FETCH(1) >> 8;
   1906     get_virtual_reg(v1+1, OpndSize_32, 2, false);
   1907 #ifdef OPTION_OLD
   1908     move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
   1909     move_imm_to_reg(OpndSize_32, 1, 4, false);
   1910     move_imm_to_reg(OpndSize_32, 0, 5, false);
   1911 #endif
   1912     compare_VR_reg(OpndSize_32,
   1913                                 v2+1, 2, false);
   1914 #ifndef OPTION_OLD
   1915     conditional_jump(Condition_L, ".cmp_long_less", true);
   1916     conditional_jump(Condition_G, ".cmp_long_greater", true);
   1917 #else
   1918     conditional_jump(Condition_E, ".cmp_long_equal", true);
   1919     rememberState(1);
   1920     conditional_move_reg_to_reg(OpndSize_32, Condition_L, //below vs less
   1921                                              3, false, 6, false);
   1922     conditional_move_reg_to_reg(OpndSize_32, Condition_G, //above vs greater
   1923                                              4, false, 6, false);
   1924     set_virtual_reg(vA, OpndSize_32, 6, false);
   1925     rememberState(2);
   1926     unconditional_jump(".cmp_long_okay", true);
   1927     insertLabel(".cmp_long_equal", true);
   1928     goToState(1);
   1929 #endif
   1930 
   1931     get_virtual_reg(v1, OpndSize_32, 1, false);
   1932     compare_VR_reg(OpndSize_32,
   1933                                 v2, 1, false);
   1934 #ifdef OPTION_OLD
   1935     conditional_move_reg_to_reg(OpndSize_32, Condition_E,
   1936                                              5, false, 6, false);
   1937     conditional_move_reg_to_reg(OpndSize_32, Condition_B, //below vs less
   1938                                              3, false, 6, false);
   1939     conditional_move_reg_to_reg(OpndSize_32, Condition_A, //above vs greater
   1940                                              4, false, 6, false);
   1941     set_virtual_reg(vA, OpndSize_32, 6, false);
   1942     transferToState(2);
   1943 #else
   1944     conditional_jump(Condition_A, ".cmp_long_greater", true);
   1945     conditional_jump(Condition_NE, ".cmp_long_less", true);
   1946     set_VR_to_imm(vA, OpndSize_32, 0);
   1947     unconditional_jump(".cmp_long_okay", true);
   1948 
   1949     insertLabel(".cmp_long_less", true);
   1950     set_VR_to_imm(vA, OpndSize_32, 0xffffffff);
   1951     unconditional_jump(".cmp_long_okay", true);
   1952 
   1953     insertLabel(".cmp_long_greater", true);
   1954     set_VR_to_imm(vA, OpndSize_32, 1);
   1955 #endif
   1956     insertLabel(".cmp_long_okay", true);
   1957     rPC += 2;
   1958     return 0;
   1959 }
   1960 #undef P_GPR_1
   1961 #undef P_GPR_2
   1962 #undef P_GPR_3
   1963