Home | History | Annotate | Download | only in slicer
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #pragma once
     18 
     19 #include "dex_format.h"
     20 
     21 #include <stddef.h>
     22 
     23 // .dex bytecode definitions and helpers:
     24 // https://source.android.com/devices/tech/dalvik/dalvik-bytecode.html
     25 
     26 namespace dex {
     27 
     28 // The number of Dalvik opcodes
     29 constexpr size_t kNumPackedOpcodes = 0x100;
     30 
     31 // Switch table and array data signatures are a code unit consisting
     32 // of "NOP" (0x00) in the low-order byte and a non-zero identifying
     33 // code in the high-order byte. (A true NOP is 0x0000.)
     34 constexpr u2 kPackedSwitchSignature = 0x0100;
     35 constexpr u2 kSparseSwitchSignature = 0x0200;
     36 constexpr u2 kArrayDataSignature = 0x0300;
     37 
     38 // Enumeration of all Dalvik opcodes
     39 enum Opcode : u1 {
     40   OP_NOP = 0x00,
     41   OP_MOVE = 0x01,
     42   OP_MOVE_FROM16 = 0x02,
     43   OP_MOVE_16 = 0x03,
     44   OP_MOVE_WIDE = 0x04,
     45   OP_MOVE_WIDE_FROM16 = 0x05,
     46   OP_MOVE_WIDE_16 = 0x06,
     47   OP_MOVE_OBJECT = 0x07,
     48   OP_MOVE_OBJECT_FROM16 = 0x08,
     49   OP_MOVE_OBJECT_16 = 0x09,
     50   OP_MOVE_RESULT = 0x0a,
     51   OP_MOVE_RESULT_WIDE = 0x0b,
     52   OP_MOVE_RESULT_OBJECT = 0x0c,
     53   OP_MOVE_EXCEPTION = 0x0d,
     54   OP_RETURN_VOID = 0x0e,
     55   OP_RETURN = 0x0f,
     56   OP_RETURN_WIDE = 0x10,
     57   OP_RETURN_OBJECT = 0x11,
     58   OP_CONST_4 = 0x12,
     59   OP_CONST_16 = 0x13,
     60   OP_CONST = 0x14,
     61   OP_CONST_HIGH16 = 0x15,
     62   OP_CONST_WIDE_16 = 0x16,
     63   OP_CONST_WIDE_32 = 0x17,
     64   OP_CONST_WIDE = 0x18,
     65   OP_CONST_WIDE_HIGH16 = 0x19,
     66   OP_CONST_STRING = 0x1a,
     67   OP_CONST_STRING_JUMBO = 0x1b,
     68   OP_CONST_CLASS = 0x1c,
     69   OP_MONITOR_ENTER = 0x1d,
     70   OP_MONITOR_EXIT = 0x1e,
     71   OP_CHECK_CAST = 0x1f,
     72   OP_INSTANCE_OF = 0x20,
     73   OP_ARRAY_LENGTH = 0x21,
     74   OP_NEW_INSTANCE = 0x22,
     75   OP_NEW_ARRAY = 0x23,
     76   OP_FILLED_NEW_ARRAY = 0x24,
     77   OP_FILLED_NEW_ARRAY_RANGE = 0x25,
     78   OP_FILL_ARRAY_DATA = 0x26,
     79   OP_THROW = 0x27,
     80   OP_GOTO = 0x28,
     81   OP_GOTO_16 = 0x29,
     82   OP_GOTO_32 = 0x2a,
     83   OP_PACKED_SWITCH = 0x2b,
     84   OP_SPARSE_SWITCH = 0x2c,
     85   OP_CMPL_FLOAT = 0x2d,
     86   OP_CMPG_FLOAT = 0x2e,
     87   OP_CMPL_DOUBLE = 0x2f,
     88   OP_CMPG_DOUBLE = 0x30,
     89   OP_CMP_LONG = 0x31,
     90   OP_IF_EQ = 0x32,
     91   OP_IF_NE = 0x33,
     92   OP_IF_LT = 0x34,
     93   OP_IF_GE = 0x35,
     94   OP_IF_GT = 0x36,
     95   OP_IF_LE = 0x37,
     96   OP_IF_EQZ = 0x38,
     97   OP_IF_NEZ = 0x39,
     98   OP_IF_LTZ = 0x3a,
     99   OP_IF_GEZ = 0x3b,
    100   OP_IF_GTZ = 0x3c,
    101   OP_IF_LEZ = 0x3d,
    102   OP_UNUSED_3E = 0x3e,
    103   OP_UNUSED_3F = 0x3f,
    104   OP_UNUSED_40 = 0x40,
    105   OP_UNUSED_41 = 0x41,
    106   OP_UNUSED_42 = 0x42,
    107   OP_UNUSED_43 = 0x43,
    108   OP_AGET = 0x44,
    109   OP_AGET_WIDE = 0x45,
    110   OP_AGET_OBJECT = 0x46,
    111   OP_AGET_BOOLEAN = 0x47,
    112   OP_AGET_BYTE = 0x48,
    113   OP_AGET_CHAR = 0x49,
    114   OP_AGET_SHORT = 0x4a,
    115   OP_APUT = 0x4b,
    116   OP_APUT_WIDE = 0x4c,
    117   OP_APUT_OBJECT = 0x4d,
    118   OP_APUT_BOOLEAN = 0x4e,
    119   OP_APUT_BYTE = 0x4f,
    120   OP_APUT_CHAR = 0x50,
    121   OP_APUT_SHORT = 0x51,
    122   OP_IGET = 0x52,
    123   OP_IGET_WIDE = 0x53,
    124   OP_IGET_OBJECT = 0x54,
    125   OP_IGET_BOOLEAN = 0x55,
    126   OP_IGET_BYTE = 0x56,
    127   OP_IGET_CHAR = 0x57,
    128   OP_IGET_SHORT = 0x58,
    129   OP_IPUT = 0x59,
    130   OP_IPUT_WIDE = 0x5a,
    131   OP_IPUT_OBJECT = 0x5b,
    132   OP_IPUT_BOOLEAN = 0x5c,
    133   OP_IPUT_BYTE = 0x5d,
    134   OP_IPUT_CHAR = 0x5e,
    135   OP_IPUT_SHORT = 0x5f,
    136   OP_SGET = 0x60,
    137   OP_SGET_WIDE = 0x61,
    138   OP_SGET_OBJECT = 0x62,
    139   OP_SGET_BOOLEAN = 0x63,
    140   OP_SGET_BYTE = 0x64,
    141   OP_SGET_CHAR = 0x65,
    142   OP_SGET_SHORT = 0x66,
    143   OP_SPUT = 0x67,
    144   OP_SPUT_WIDE = 0x68,
    145   OP_SPUT_OBJECT = 0x69,
    146   OP_SPUT_BOOLEAN = 0x6a,
    147   OP_SPUT_BYTE = 0x6b,
    148   OP_SPUT_CHAR = 0x6c,
    149   OP_SPUT_SHORT = 0x6d,
    150   OP_INVOKE_VIRTUAL = 0x6e,
    151   OP_INVOKE_SUPER = 0x6f,
    152   OP_INVOKE_DIRECT = 0x70,
    153   OP_INVOKE_STATIC = 0x71,
    154   OP_INVOKE_INTERFACE = 0x72,
    155   OP_UNUSED_73 = 0x73,
    156   OP_INVOKE_VIRTUAL_RANGE = 0x74,
    157   OP_INVOKE_SUPER_RANGE = 0x75,
    158   OP_INVOKE_DIRECT_RANGE = 0x76,
    159   OP_INVOKE_STATIC_RANGE = 0x77,
    160   OP_INVOKE_INTERFACE_RANGE = 0x78,
    161   OP_UNUSED_79 = 0x79,
    162   OP_UNUSED_7A = 0x7a,
    163   OP_NEG_INT = 0x7b,
    164   OP_NOT_INT = 0x7c,
    165   OP_NEG_LONG = 0x7d,
    166   OP_NOT_LONG = 0x7e,
    167   OP_NEG_FLOAT = 0x7f,
    168   OP_NEG_DOUBLE = 0x80,
    169   OP_INT_TO_LONG = 0x81,
    170   OP_INT_TO_FLOAT = 0x82,
    171   OP_INT_TO_DOUBLE = 0x83,
    172   OP_LONG_TO_INT = 0x84,
    173   OP_LONG_TO_FLOAT = 0x85,
    174   OP_LONG_TO_DOUBLE = 0x86,
    175   OP_FLOAT_TO_INT = 0x87,
    176   OP_FLOAT_TO_LONG = 0x88,
    177   OP_FLOAT_TO_DOUBLE = 0x89,
    178   OP_DOUBLE_TO_INT = 0x8a,
    179   OP_DOUBLE_TO_LONG = 0x8b,
    180   OP_DOUBLE_TO_FLOAT = 0x8c,
    181   OP_INT_TO_BYTE = 0x8d,
    182   OP_INT_TO_CHAR = 0x8e,
    183   OP_INT_TO_SHORT = 0x8f,
    184   OP_ADD_INT = 0x90,
    185   OP_SUB_INT = 0x91,
    186   OP_MUL_INT = 0x92,
    187   OP_DIV_INT = 0x93,
    188   OP_REM_INT = 0x94,
    189   OP_AND_INT = 0x95,
    190   OP_OR_INT = 0x96,
    191   OP_XOR_INT = 0x97,
    192   OP_SHL_INT = 0x98,
    193   OP_SHR_INT = 0x99,
    194   OP_USHR_INT = 0x9a,
    195   OP_ADD_LONG = 0x9b,
    196   OP_SUB_LONG = 0x9c,
    197   OP_MUL_LONG = 0x9d,
    198   OP_DIV_LONG = 0x9e,
    199   OP_REM_LONG = 0x9f,
    200   OP_AND_LONG = 0xa0,
    201   OP_OR_LONG = 0xa1,
    202   OP_XOR_LONG = 0xa2,
    203   OP_SHL_LONG = 0xa3,
    204   OP_SHR_LONG = 0xa4,
    205   OP_USHR_LONG = 0xa5,
    206   OP_ADD_FLOAT = 0xa6,
    207   OP_SUB_FLOAT = 0xa7,
    208   OP_MUL_FLOAT = 0xa8,
    209   OP_DIV_FLOAT = 0xa9,
    210   OP_REM_FLOAT = 0xaa,
    211   OP_ADD_DOUBLE = 0xab,
    212   OP_SUB_DOUBLE = 0xac,
    213   OP_MUL_DOUBLE = 0xad,
    214   OP_DIV_DOUBLE = 0xae,
    215   OP_REM_DOUBLE = 0xaf,
    216   OP_ADD_INT_2ADDR = 0xb0,
    217   OP_SUB_INT_2ADDR = 0xb1,
    218   OP_MUL_INT_2ADDR = 0xb2,
    219   OP_DIV_INT_2ADDR = 0xb3,
    220   OP_REM_INT_2ADDR = 0xb4,
    221   OP_AND_INT_2ADDR = 0xb5,
    222   OP_OR_INT_2ADDR = 0xb6,
    223   OP_XOR_INT_2ADDR = 0xb7,
    224   OP_SHL_INT_2ADDR = 0xb8,
    225   OP_SHR_INT_2ADDR = 0xb9,
    226   OP_USHR_INT_2ADDR = 0xba,
    227   OP_ADD_LONG_2ADDR = 0xbb,
    228   OP_SUB_LONG_2ADDR = 0xbc,
    229   OP_MUL_LONG_2ADDR = 0xbd,
    230   OP_DIV_LONG_2ADDR = 0xbe,
    231   OP_REM_LONG_2ADDR = 0xbf,
    232   OP_AND_LONG_2ADDR = 0xc0,
    233   OP_OR_LONG_2ADDR = 0xc1,
    234   OP_XOR_LONG_2ADDR = 0xc2,
    235   OP_SHL_LONG_2ADDR = 0xc3,
    236   OP_SHR_LONG_2ADDR = 0xc4,
    237   OP_USHR_LONG_2ADDR = 0xc5,
    238   OP_ADD_FLOAT_2ADDR = 0xc6,
    239   OP_SUB_FLOAT_2ADDR = 0xc7,
    240   OP_MUL_FLOAT_2ADDR = 0xc8,
    241   OP_DIV_FLOAT_2ADDR = 0xc9,
    242   OP_REM_FLOAT_2ADDR = 0xca,
    243   OP_ADD_DOUBLE_2ADDR = 0xcb,
    244   OP_SUB_DOUBLE_2ADDR = 0xcc,
    245   OP_MUL_DOUBLE_2ADDR = 0xcd,
    246   OP_DIV_DOUBLE_2ADDR = 0xce,
    247   OP_REM_DOUBLE_2ADDR = 0xcf,
    248   OP_ADD_INT_LIT16 = 0xd0,
    249   OP_RSUB_INT = 0xd1,
    250   OP_MUL_INT_LIT16 = 0xd2,
    251   OP_DIV_INT_LIT16 = 0xd3,
    252   OP_REM_INT_LIT16 = 0xd4,
    253   OP_AND_INT_LIT16 = 0xd5,
    254   OP_OR_INT_LIT16 = 0xd6,
    255   OP_XOR_INT_LIT16 = 0xd7,
    256   OP_ADD_INT_LIT8 = 0xd8,
    257   OP_RSUB_INT_LIT8 = 0xd9,
    258   OP_MUL_INT_LIT8 = 0xda,
    259   OP_DIV_INT_LIT8 = 0xdb,
    260   OP_REM_INT_LIT8 = 0xdc,
    261   OP_AND_INT_LIT8 = 0xdd,
    262   OP_OR_INT_LIT8 = 0xde,
    263   OP_XOR_INT_LIT8 = 0xdf,
    264   OP_SHL_INT_LIT8 = 0xe0,
    265   OP_SHR_INT_LIT8 = 0xe1,
    266   OP_USHR_INT_LIT8 = 0xe2,
    267   OP_IGET_VOLATILE = 0xe3,
    268   OP_IPUT_VOLATILE = 0xe4,
    269   OP_SGET_VOLATILE = 0xe5,
    270   OP_SPUT_VOLATILE = 0xe6,
    271   OP_IGET_OBJECT_VOLATILE = 0xe7,
    272   OP_IGET_WIDE_VOLATILE = 0xe8,
    273   OP_IPUT_WIDE_VOLATILE = 0xe9,
    274   OP_SGET_WIDE_VOLATILE = 0xea,
    275   OP_SPUT_WIDE_VOLATILE = 0xeb,
    276   OP_BREAKPOINT = 0xec,
    277   OP_THROW_VERIFICATION_ERROR = 0xed,
    278   OP_EXECUTE_INLINE = 0xee,
    279   OP_EXECUTE_INLINE_RANGE = 0xef,
    280   OP_INVOKE_OBJECT_INIT_RANGE = 0xf0,
    281   OP_RETURN_VOID_BARRIER = 0xf1,
    282   OP_IGET_QUICK = 0xf2,
    283   OP_IGET_WIDE_QUICK = 0xf3,
    284   OP_IGET_OBJECT_QUICK = 0xf4,
    285   OP_IPUT_QUICK = 0xf5,
    286   OP_IPUT_WIDE_QUICK = 0xf6,
    287   OP_IPUT_OBJECT_QUICK = 0xf7,
    288   OP_INVOKE_VIRTUAL_QUICK = 0xf8,
    289   OP_INVOKE_VIRTUAL_QUICK_RANGE = 0xf9,
    290   OP_INVOKE_SUPER_QUICK = 0xfa,
    291   OP_INVOKE_SUPER_QUICK_RANGE = 0xfb,
    292   OP_IPUT_OBJECT_VOLATILE = 0xfc,
    293   OP_SGET_OBJECT_VOLATILE = 0xfd,
    294   OP_SPUT_OBJECT_VOLATILE = 0xfe,
    295   OP_UNUSED_FF = 0xff,
    296 };
    297 
    298 // Instruction formats associated with Dalvik opcodes
    299 enum InstructionFormat : u1 {
    300   kFmt00x = 0,  // unknown format (also used for "breakpoint" opcode)
    301   kFmt10x,      // op
    302   kFmt12x,      // op vA, vB
    303   kFmt11n,      // op vA, #+B
    304   kFmt11x,      // op vAA
    305   kFmt10t,      // op +AA
    306   kFmt20bc,     // [opt] op AA, thing@BBBB
    307   kFmt20t,      // op +AAAA
    308   kFmt22x,      // op vAA, vBBBB
    309   kFmt21t,      // op vAA, +BBBB
    310   kFmt21s,      // op vAA, #+BBBB
    311   kFmt21h,      // op vAA, #+BBBB00000[00000000]
    312   kFmt21c,      // op vAA, thing@BBBB
    313   kFmt23x,      // op vAA, vBB, vCC
    314   kFmt22b,      // op vAA, vBB, #+CC
    315   kFmt22t,      // op vA, vB, +CCCC
    316   kFmt22s,      // op vA, vB, #+CCCC
    317   kFmt22c,      // op vA, vB, thing@CCCC
    318   kFmt22cs,     // [opt] op vA, vB, field offset CCCC
    319   kFmt30t,      // op +AAAAAAAA
    320   kFmt32x,      // op vAAAA, vBBBB
    321   kFmt31i,      // op vAA, #+BBBBBBBB
    322   kFmt31t,      // op vAA, +BBBBBBBB
    323   kFmt31c,      // op vAA, string@BBBBBBBB
    324   kFmt35c,      // op {vC,vD,vE,vF,vG}, thing@BBBB
    325   kFmt35ms,     // [opt] invoke-virtual+super
    326   kFmt3rc,      // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
    327   kFmt3rms,     // [opt] invoke-virtual+super/range
    328   kFmt51l,      // op vAA, #+BBBBBBBBBBBBBBBB
    329   kFmt35mi,     // [opt] inline invoke
    330   kFmt3rmi,     // [opt] inline invoke/range
    331 };
    332 
    333 using OpcodeFlags = u4;
    334 
    335 enum : OpcodeFlags {
    336   kInstrCanBranch     = 1 << 0,   // conditional or unconditional branch
    337   kInstrCanContinue   = 1 << 1,   // flow can continue to next statement
    338   kInstrCanSwitch     = 1 << 2,   // switch statement
    339   kInstrCanThrow      = 1 << 3,   // could cause an exception to be thrown
    340   kInstrCanReturn     = 1 << 4,   // returns, no additional statements
    341   kInstrInvoke        = 1 << 5,   // a flavor of invoke
    342   kInstrWideRegA      = 1 << 6,   // wide (64bit) vA
    343   kInstrWideRegB      = 1 << 7,   // wide (64bit) vB
    344   kInstrWideRegC      = 1 << 8,   // wide (64bit) vC
    345 };
    346 
    347 // Types of indexed reference that are associated with opcodes whose
    348 // formats include such an indexed reference (e.g., 21c and 35c).
    349 enum InstructionIndexType : u1 {
    350   kIndexUnknown = 0,
    351   kIndexNone,          // has no index
    352   kIndexVaries,        // "It depends." Used for throw-verification-error
    353   kIndexTypeRef,       // type reference index
    354   kIndexStringRef,     // string reference index
    355   kIndexMethodRef,     // method reference index
    356   kIndexFieldRef,      // field reference index
    357   kIndexInlineMethod,  // inline method index (for inline linked methods)
    358   kIndexVtableOffset,  // vtable offset (for static linked methods)
    359   kIndexFieldOffset    // field offset (for static linked fields)
    360 };
    361 
    362 // Holds the contents of a decoded instruction.
    363 struct Instruction {
    364   u4 vA;                // the A field of the instruction
    365   u4 vB;                // the B field of the instruction
    366   u8 vB_wide;           // 64bit version of the B field (for kFmt51l)
    367   u4 vC;                // the C field of the instruction
    368   u4 arg[5];            // vC/D/E/F/G in invoke or filled-new-array
    369   Opcode opcode;        // instruction opcode
    370 };
    371 
    372 // "packed-switch-payload" format
    373 struct PackedSwitchPayload {
    374   u2 ident;
    375   u2 size;
    376   s4 first_key;
    377   s4 targets[];
    378 };
    379 
    380 // "sparse-switch-payload" format
    381 struct SparseSwitchPayload {
    382   u2 ident;
    383   u2 size;
    384   s4 data[];
    385 };
    386 
    387 // "fill-array-data-payload" format
    388 struct ArrayData {
    389   u2 ident;
    390   u2 element_width;
    391   u4 size;
    392   u1 data[];
    393 };
    394 
    395 // Extracts the opcode from a Dalvik code unit (bytecode)
    396 Opcode OpcodeFromBytecode(u2 bytecode);
    397 
    398 // Returns the name of an opcode
    399 const char* GetOpcodeName(Opcode opcode);
    400 
    401 // Returns the index type associated with the specified opcode
    402 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode);
    403 
    404 // Returns the format associated with the specified opcode
    405 InstructionFormat GetFormatFromOpcode(Opcode opcode);
    406 
    407 // Returns the flags for the specified opcode
    408 OpcodeFlags GetFlagsFromOpcode(Opcode opcode);
    409 
    410 // Returns the instruction width for the specified opcode
    411 size_t GetWidthFromOpcode(Opcode opcode);
    412 
    413 // Return the width of the specified instruction, or 0 if not defined.  Also
    414 // works for special OP_NOP entries, including switch statement data tables
    415 // and array data.
    416 size_t GetWidthFromBytecode(const u2* bytecode);
    417 
    418 // Decode a .dex bytecode
    419 Instruction DecodeInstruction(const u2* bytecode);
    420 
    421 }  // namespace dex
    422