Home | History | Annotate | Download | only in dexdump
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  * Implementation file for control flow graph dumping for the dexdump utility.
     17  */
     18 
     19 #include "dexdump_cfg.h"
     20 
     21 #include <inttypes.h>
     22 #include <ostream>
     23 #include <map>
     24 #include <set>
     25 
     26 #include "dex_file-inl.h"
     27 #include "dex_instruction-inl.h"
     28 
     29 namespace art {
     30 
     31 static void dumpMethodCFGImpl(const DexFile* dex_file,
     32                               uint32_t dex_method_idx,
     33                               const DexFile::CodeItem* code_item,
     34                               std::ostream& os) {
     35   os << "digraph {\n";
     36   os << "  # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n";
     37 
     38   std::set<uint32_t> dex_pc_is_branch_target;
     39   {
     40     // Go and populate.
     41     const Instruction* inst = Instruction::At(code_item->insns_);
     42     for (uint32_t dex_pc = 0;
     43          dex_pc < code_item->insns_size_in_code_units_;
     44          dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
     45       if (inst->IsBranch()) {
     46         dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
     47       } else if (inst->IsSwitch()) {
     48         const uint16_t* insns = code_item->insns_ + dex_pc;
     49         int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
     50         const uint16_t* switch_insns = insns + switch_offset;
     51         uint32_t switch_count = switch_insns[1];
     52         int32_t targets_offset;
     53         if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
     54           /* 0=sig, 1=count, 2/3=firstKey */
     55           targets_offset = 4;
     56         } else {
     57           /* 0=sig, 1=count, 2..count*2 = keys */
     58           targets_offset = 2 + 2 * switch_count;
     59         }
     60         for (uint32_t targ = 0; targ < switch_count; targ++) {
     61           int32_t offset =
     62               static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
     63               static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
     64           dex_pc_is_branch_target.insert(dex_pc + offset);
     65         }
     66       }
     67     }
     68   }
     69 
     70   // Create nodes for "basic blocks."
     71   std::map<uint32_t, uint32_t> dex_pc_to_node_id;  // This only has entries for block starts.
     72   std::map<uint32_t, uint32_t> dex_pc_to_incl_id;  // This has entries for all dex pcs.
     73 
     74   {
     75     const Instruction* inst = Instruction::At(code_item->insns_);
     76     bool first_in_block = true;
     77     bool force_new_block = false;
     78     for (uint32_t dex_pc = 0;
     79          dex_pc < code_item->insns_size_in_code_units_;
     80          dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
     81       if (dex_pc == 0 ||
     82           (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
     83           force_new_block) {
     84         uint32_t id = dex_pc_to_node_id.size();
     85         if (id > 0) {
     86           // End last node.
     87           os << "}\"];\n";
     88         }
     89         // Start next node.
     90         os << "  node" << id << " [shape=record,label=\"{";
     91         dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
     92         first_in_block = true;
     93         force_new_block = false;
     94       }
     95 
     96       // Register instruction.
     97       dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
     98 
     99       // Print instruction.
    100       if (!first_in_block) {
    101         os << " | ";
    102       } else {
    103         first_in_block = false;
    104       }
    105 
    106       // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
    107       os << "<" << "p" << dex_pc << ">";
    108       os << " 0x" << std::hex << dex_pc << std::dec << ": ";
    109       std::string inst_str = inst->DumpString(dex_file);
    110       size_t cur_start = 0;  // It's OK to start at zero, instruction dumps don't start with chars
    111                              // we need to escape.
    112       while (cur_start != std::string::npos) {
    113         size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
    114         if (next_escape == std::string::npos) {
    115           os << inst_str.substr(cur_start, inst_str.size() - cur_start);
    116           break;
    117         } else {
    118           os << inst_str.substr(cur_start, next_escape - cur_start);
    119           // Escape all necessary characters.
    120           while (next_escape < inst_str.size()) {
    121             char c = inst_str.at(next_escape);
    122             if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
    123               os << '\\' << c;
    124             } else {
    125               break;
    126             }
    127             next_escape++;
    128           }
    129           if (next_escape >= inst_str.size()) {
    130             next_escape = std::string::npos;
    131           }
    132           cur_start = next_escape;
    133         }
    134       }
    135 
    136       // Force a new block for some fall-throughs and some instructions that terminate the "local"
    137       // control flow.
    138       force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
    139     }
    140     // Close last node.
    141     if (dex_pc_to_node_id.size() > 0) {
    142       os << "}\"];\n";
    143     }
    144   }
    145 
    146   // Create edges between them.
    147   {
    148     std::ostringstream regular_edges;
    149     std::ostringstream taken_edges;
    150     std::ostringstream exception_edges;
    151 
    152     // Common set of exception edges.
    153     std::set<uint32_t> exception_targets;
    154 
    155     // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
    156     // pass. In the first pass we try and see whether we can use a common set of edges.
    157     std::set<uint32_t> blocks_with_detailed_exceptions;
    158 
    159     {
    160       uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
    161       uint32_t old_dex_pc = 0;
    162       uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
    163       const Instruction* inst = Instruction::At(code_item->insns_);
    164       for (uint32_t dex_pc = 0;
    165           dex_pc < code_item->insns_size_in_code_units_;
    166           old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
    167         {
    168           auto it = dex_pc_to_node_id.find(dex_pc);
    169           if (it != dex_pc_to_node_id.end()) {
    170             if (!exception_targets.empty()) {
    171               // It seems the last block had common exception handlers. Add the exception edges now.
    172               uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
    173               for (uint32_t handler_pc : exception_targets) {
    174                 auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
    175                 if (node_id_it != dex_pc_to_incl_id.end()) {
    176                   exception_edges << "  node" << node_id
    177                       << " -> node" << node_id_it->second << ":p" << handler_pc
    178                       << ";\n";
    179                 }
    180               }
    181               exception_targets.clear();
    182             }
    183 
    184             block_start_dex_pc = dex_pc;
    185 
    186             // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
    187             // like switch data.
    188             uint32_t old_last = last_node_id;
    189             last_node_id = it->second;
    190             if (old_last != std::numeric_limits<uint32_t>::max()) {
    191               regular_edges << "  node" << old_last << ":p" << old_dex_pc
    192                   << " -> node" << last_node_id << ":p" << dex_pc
    193                   << ";\n";
    194             }
    195           }
    196 
    197           // Look at the exceptions of the first entry.
    198           CatchHandlerIterator catch_it(*code_item, dex_pc);
    199           for (; catch_it.HasNext(); catch_it.Next()) {
    200             exception_targets.insert(catch_it.GetHandlerAddress());
    201           }
    202         }
    203 
    204         // Handle instruction.
    205 
    206         // Branch: something with at most two targets.
    207         if (inst->IsBranch()) {
    208           const int32_t offset = inst->GetTargetOffset();
    209           const bool conditional = !inst->IsUnconditional();
    210 
    211           auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
    212           if (target_it != dex_pc_to_node_id.end()) {
    213             taken_edges << "  node" << last_node_id << ":p" << dex_pc
    214                 << " -> node" << target_it->second << ":p" << (dex_pc + offset)
    215                 << ";\n";
    216           }
    217           if (!conditional) {
    218             // No fall-through.
    219             last_node_id = std::numeric_limits<uint32_t>::max();
    220           }
    221         } else if (inst->IsSwitch()) {
    222           // TODO: Iterate through all switch targets.
    223           const uint16_t* insns = code_item->insns_ + dex_pc;
    224           /* make sure the start of the switch is in range */
    225           int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
    226           /* offset to switch table is a relative branch-style offset */
    227           const uint16_t* switch_insns = insns + switch_offset;
    228           uint32_t switch_count = switch_insns[1];
    229           int32_t targets_offset;
    230           if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
    231             /* 0=sig, 1=count, 2/3=firstKey */
    232             targets_offset = 4;
    233           } else {
    234             /* 0=sig, 1=count, 2..count*2 = keys */
    235             targets_offset = 2 + 2 * switch_count;
    236           }
    237           /* make sure the end of the switch is in range */
    238           /* verify each switch target */
    239           for (uint32_t targ = 0; targ < switch_count; targ++) {
    240             int32_t offset =
    241                 static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
    242                 static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
    243             int32_t abs_offset = dex_pc + offset;
    244             auto target_it = dex_pc_to_node_id.find(abs_offset);
    245             if (target_it != dex_pc_to_node_id.end()) {
    246               // TODO: value label.
    247               taken_edges << "  node" << last_node_id << ":p" << dex_pc
    248                   << " -> node" << target_it->second << ":p" << (abs_offset)
    249                   << ";\n";
    250             }
    251           }
    252         }
    253 
    254         // Exception edges. If this is not the first instruction in the block
    255         if (block_start_dex_pc != dex_pc) {
    256           std::set<uint32_t> current_handler_pcs;
    257           CatchHandlerIterator catch_it(*code_item, dex_pc);
    258           for (; catch_it.HasNext(); catch_it.Next()) {
    259             current_handler_pcs.insert(catch_it.GetHandlerAddress());
    260           }
    261           if (current_handler_pcs != exception_targets) {
    262             exception_targets.clear();  // Clear so we don't do something at the end.
    263             blocks_with_detailed_exceptions.insert(block_start_dex_pc);
    264           }
    265         }
    266 
    267         if (inst->IsReturn() ||
    268             (inst->Opcode() == Instruction::THROW) ||
    269             (inst->IsBranch() && inst->IsUnconditional())) {
    270           // No fall-through.
    271           last_node_id = std::numeric_limits<uint32_t>::max();
    272         }
    273       }
    274       // Finish up the last block, if it had common exceptions.
    275       if (!exception_targets.empty()) {
    276         // It seems the last block had common exception handlers. Add the exception edges now.
    277         uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
    278         for (uint32_t handler_pc : exception_targets) {
    279           auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
    280           if (node_id_it != dex_pc_to_incl_id.end()) {
    281             exception_edges << "  node" << node_id
    282                 << " -> node" << node_id_it->second << ":p" << handler_pc
    283                 << ";\n";
    284           }
    285         }
    286         exception_targets.clear();
    287       }
    288     }
    289 
    290     // Second pass for detailed exception blocks.
    291     // TODO
    292     // Exception edges. If this is not the first instruction in the block
    293     for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
    294       const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
    295       uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
    296       while (true) {
    297         CatchHandlerIterator catch_it(*code_item, dex_pc);
    298         if (catch_it.HasNext()) {
    299           std::set<uint32_t> handled_targets;
    300           for (; catch_it.HasNext(); catch_it.Next()) {
    301             uint32_t handler_pc = catch_it.GetHandlerAddress();
    302             auto it = handled_targets.find(handler_pc);
    303             if (it == handled_targets.end()) {
    304               auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
    305               if (node_id_it != dex_pc_to_incl_id.end()) {
    306                 exception_edges << "  node" << this_node_id << ":p" << dex_pc
    307                     << " -> node" << node_id_it->second << ":p" << handler_pc
    308                     << ";\n";
    309               }
    310 
    311               // Mark as done.
    312               handled_targets.insert(handler_pc);
    313             }
    314           }
    315         }
    316         if (inst->IsBasicBlockEnd()) {
    317           break;
    318         }
    319 
    320         // Loop update. Have a break-out if the next instruction is a branch target and thus in
    321         // another block.
    322         dex_pc += inst->SizeInCodeUnits();
    323         if (dex_pc >= code_item->insns_size_in_code_units_) {
    324           break;
    325         }
    326         if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
    327           break;
    328         }
    329         inst = inst->Next();
    330       }
    331     }
    332 
    333     // Write out the sub-graphs to make edges styled.
    334     os << "\n";
    335     os << "  subgraph regular_edges {\n";
    336     os << "    edge [color=\"#000000\",weight=.3,len=3];\n\n";
    337     os << "    " << regular_edges.str() << "\n";
    338     os << "  }\n\n";
    339 
    340     os << "  subgraph taken_edges {\n";
    341     os << "    edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
    342     os << "    " << taken_edges.str() << "\n";
    343     os << "  }\n\n";
    344 
    345     os << "  subgraph exception_edges {\n";
    346     os << "    edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
    347     os << "    " << exception_edges.str() << "\n";
    348     os << "  }\n\n";
    349   }
    350 
    351   os << "}\n";
    352 }
    353 
    354 void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
    355   // This is painful, we need to find the code item. That means finding the class, and then
    356   // iterating the table.
    357   if (dex_method_idx >= dex_file->NumMethodIds()) {
    358     os << "Could not find method-idx.";
    359     return;
    360   }
    361   const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
    362 
    363   const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
    364   if (class_def == nullptr) {
    365     os << "Could not find class-def.";
    366     return;
    367   }
    368 
    369   const uint8_t* class_data = dex_file->GetClassData(*class_def);
    370   if (class_data == nullptr) {
    371     os << "No class data.";
    372     return;
    373   }
    374 
    375   ClassDataItemIterator it(*dex_file, class_data);
    376   // Skip fields
    377   while (it.HasNextStaticField() || it.HasNextInstanceField()) {
    378     it.Next();
    379   }
    380 
    381   // Find method, and dump it.
    382   while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
    383     uint32_t method_idx = it.GetMemberIndex();
    384     if (method_idx == dex_method_idx) {
    385       dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
    386       return;
    387     }
    388     it.Next();
    389   }
    390 
    391   // Otherwise complain.
    392   os << "Something went wrong, didn't find the method in the class data.";
    393 }
    394 
    395 }  // namespace art
    396