Home | History | Annotate | Download | only in courgette
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
      6 #define COURGETTE_ASSEMBLY_PROGRAM_H_
      7 
      8 #include <map>
      9 #include <set>
     10 #include <vector>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/memory/scoped_ptr.h"
     14 
     15 #include "courgette/disassembler.h"
     16 #include "courgette/memory_allocator.h"
     17 
     18 namespace courgette {
     19 
     20 class EncodedProgram;
     21 class Instruction;
     22 
     23 typedef NoThrowBuffer<Instruction*> InstructionVector;
     24 
     25 // A Label is a symbolic reference to an address.  Unlike a conventional
     26 // assembly language, we always know the address.  The address will later be
     27 // stored in a table and the Label will be replaced with the index into the
     28 // table.
     29 //
     30 // TODO(sra): Make fields private and add setters and getters.
     31 class Label {
     32  public:
     33   static const int kNoIndex = -1;
     34   Label() : rva_(0), index_(kNoIndex), count_(0) {}
     35   explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
     36 
     37   RVA rva_;    // Address referred to by the label.
     38   int index_;  // Index of address in address table, kNoIndex until assigned.
     39   int count_;
     40 };
     41 
     42 typedef std::map<RVA, Label*> RVAToLabel;
     43 
     44 // An AssemblyProgram is the result of disassembling an executable file.
     45 //
     46 // * The disassembler creates labels in the AssemblyProgram and emits
     47 //   'Instructions'.
     48 // * The disassembler then calls DefaultAssignIndexes to assign
     49 //   addresses to positions in the address tables.
     50 // * [Optional step]
     51 // * At this point the AssemblyProgram can be converted into an
     52 //   EncodedProgram and serialized to an output stream.
     53 // * Later, the EncodedProgram can be deserialized and assembled into
     54 //   the original file.
     55 //
     56 // The optional step is to modify the AssemblyProgram.  One form of modification
     57 // is to assign indexes in such a way as to make the EncodedProgram for this
     58 // AssemblyProgram look more like the EncodedProgram for some other
     59 // AssemblyProgram.  The modification process should call UnassignIndexes, do
     60 // its own assignment, and then call AssignRemainingIndexes to ensure all
     61 // indexes are assigned.
     62 //
     63 class AssemblyProgram {
     64  public:
     65   explicit AssemblyProgram(ExecutableType kind);
     66   ~AssemblyProgram();
     67 
     68   ExecutableType kind() const { return kind_; }
     69 
     70   void set_image_base(uint64 image_base) { image_base_ = image_base; }
     71 
     72   // Instructions will be assembled in the order they are emitted.
     73 
     74   // Generates an entire base relocation table.
     75   CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT;
     76 
     77   // Generates an ELF style relocation table for X86.
     78   CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT;
     79 
     80   // Generates an ELF style relocation table for ARM.
     81   CheckBool EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT;
     82 
     83   // Following instruction will be assembled at address 'rva'.
     84   CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT;
     85 
     86   // Generates a single byte of data or machine instruction.
     87   CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT;
     88 
     89   // Generates multiple bytes of data or machine instructions.
     90   CheckBool EmitBytesInstruction(const uint8* value, uint32 len)
     91       WARN_UNUSED_RESULT;
     92 
     93   // Generates 4-byte relative reference to address of 'label'.
     94   CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
     95 
     96   // Generates 4-byte relative reference to address of 'label' for
     97   // ARM.
     98   CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op,
     99                          uint16 op_size) WARN_UNUSED_RESULT;
    100 
    101   // Generates 4-byte absolute reference to address of 'label'.
    102   CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
    103 
    104   // Looks up a label or creates a new one.  Might return NULL.
    105   Label* FindOrMakeAbs32Label(RVA rva);
    106 
    107   // Looks up a label or creates a new one.  Might return NULL.
    108   Label* FindOrMakeRel32Label(RVA rva);
    109 
    110   void DefaultAssignIndexes();
    111   void UnassignIndexes();
    112   void AssignRemainingIndexes();
    113 
    114   EncodedProgram* Encode() const;
    115 
    116   // Accessor for instruction list.
    117   const InstructionVector& instructions() const {
    118     return instructions_;
    119   }
    120 
    121   // Returns the label if the instruction contains and absolute address,
    122   // otherwise returns NULL.
    123   Label* InstructionAbs32Label(const Instruction* instruction) const;
    124 
    125   // Returns the label if the instruction contains and rel32 offset,
    126   // otherwise returns NULL.
    127   Label* InstructionRel32Label(const Instruction* instruction) const;
    128 
    129   // Trim underused labels
    130   CheckBool TrimLabels();
    131 
    132   void PrintLabelCounts(RVAToLabel* labels);
    133   void CountRel32ARM();
    134 
    135  private:
    136   ExecutableType kind_;
    137 
    138   CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
    139 
    140   static const int kLabelLowerLimit;
    141 
    142   // Looks up a label or creates a new one.  Might return NULL.
    143   Label* FindLabel(RVA rva, RVAToLabel* labels);
    144 
    145   // Helper methods for the public versions.
    146   static void UnassignIndexes(RVAToLabel* labels);
    147   static void DefaultAssignIndexes(RVAToLabel* labels);
    148   static void AssignRemainingIndexes(RVAToLabel* labels);
    149 
    150   // Sharing instructions that emit a single byte saves a lot of space.
    151   Instruction* GetByteInstruction(uint8 byte);
    152   scoped_ptr<Instruction*[]> byte_instruction_cache_;
    153 
    154   uint64 image_base_;  // Desired or mandated base address of image.
    155 
    156   InstructionVector instructions_;  // All the instructions in program.
    157 
    158   // These are lookup maps to find the label associated with a given address.
    159   // We have separate label spaces for addresses referenced by rel32 labels and
    160   // abs32 labels.  This is somewhat arbitrary.
    161   RVAToLabel rel32_labels_;
    162   RVAToLabel abs32_labels_;
    163 
    164   DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
    165 };
    166 
    167 }  // namespace courgette
    168 #endif  // COURGETTE_ASSEMBLY_PROGRAM_H_
    169