Home | History | Annotate | Download | only in courgette
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "courgette/assembly_program.h"
      6 
      7 #include <memory.h>
      8 #include <algorithm>
      9 #include <map>
     10 #include <set>
     11 #include <sstream>
     12 #include <vector>
     13 
     14 #include "base/logging.h"
     15 #include "base/memory/scoped_ptr.h"
     16 
     17 #include "courgette/courgette.h"
     18 #include "courgette/encoded_program.h"
     19 
     20 namespace courgette {
     21 
     22 // Opcodes of simple assembly language
     23 enum OP {
     24   ORIGIN,         // ORIGIN <rva> - set current address for assembly.
     25   MAKEPERELOCS,   // Generates a base relocation table.
     26   MAKEELFRELOCS,  // Generates a base relocation table.
     27   DEFBYTE,        // DEFBYTE <value> - emit a byte literal.
     28   REL32,          // REL32 <label> - emit a rel32 encoded reference to 'label'.
     29   ABS32,          // REL32 <label> - emit am abs32 encoded reference to 'label'.
     30   REL32ARM,       // REL32ARM <c_op> <label> - arm-specific rel32 reference
     31   MAKEELFARMRELOCS, // Generates a base relocation table.
     32   DEFBYTES,       // Emits any number of byte literals
     33   LAST_OP
     34 };
     35 
     36 // Base class for instructions.  Because we have so many instructions we want to
     37 // keep them as small as possible.  For this reason we avoid virtual functions.
     38 //
     39 class Instruction {
     40  public:
     41   OP op() const { return static_cast<OP>(op_); }
     42 
     43  protected:
     44   explicit Instruction(OP op) : op_(op), info_(0) {}
     45   Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
     46 
     47   uint32 op_   : 4;    // A few bits to store the OP code.
     48   uint32 info_ : 28;   // Remaining bits in first word available to subclass.
     49 
     50  private:
     51   DISALLOW_COPY_AND_ASSIGN(Instruction);
     52 };
     53 
     54 namespace {
     55 
     56 // Sets the current address for the emitting instructions.
     57 class OriginInstruction : public Instruction {
     58  public:
     59   explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
     60   RVA origin_rva() const { return rva_; }
     61  private:
     62   RVA  rva_;
     63 };
     64 
     65 // Emits an entire PE base relocation table.
     66 class PeRelocsInstruction : public Instruction {
     67  public:
     68   PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
     69 };
     70 
     71 // Emits an ELF relocation table.
     72 class ElfRelocsInstruction : public Instruction {
     73  public:
     74   ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
     75 };
     76 
     77 // Emits an ELF ARM relocation table.
     78 class ElfARMRelocsInstruction : public Instruction {
     79  public:
     80   ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
     81 };
     82 
     83 // Emits a single byte.
     84 class ByteInstruction : public Instruction {
     85  public:
     86   explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
     87   uint8 byte_value() const { return info_; }
     88 };
     89 
     90 // Emits a single byte.
     91 class BytesInstruction : public Instruction {
     92  public:
     93   BytesInstruction(const uint8* values, uint32 len)
     94       : Instruction(DEFBYTES, 0),
     95         values_(values),
     96         len_(len) {}
     97   const uint8* byte_values() const { return values_; }
     98   uint32 len() const { return len_; }
     99 
    100  private:
    101   const uint8* values_;
    102   uint32 len_;
    103 };
    104 
    105 // A ABS32 to REL32 instruction emits a reference to a label's address.
    106 class InstructionWithLabel : public Instruction {
    107  public:
    108   InstructionWithLabel(OP op, Label* label)
    109     : Instruction(op, 0), label_(label) {
    110     if (label == NULL) NOTREACHED();
    111   }
    112   Label* label() const { return label_; }
    113  protected:
    114   Label* label_;
    115 };
    116 
    117 // An ARM REL32 instruction emits a reference to a label's address and
    118 // a specially-compressed ARM op.
    119 class InstructionWithLabelARM : public InstructionWithLabel {
    120  public:
    121   InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
    122                           const uint8* arm_op, uint16 op_size)
    123     : InstructionWithLabel(op, label), compressed_op_(compressed_op),
    124       arm_op_(arm_op), op_size_(op_size) {
    125     if (label == NULL) NOTREACHED();
    126   }
    127   uint16 compressed_op() const { return compressed_op_; }
    128   const uint8* arm_op() const { return arm_op_; }
    129   uint16 op_size() const { return op_size_; }
    130  private:
    131   uint16 compressed_op_;
    132   const uint8* arm_op_;
    133   uint16 op_size_;
    134 };
    135 
    136 }  // namespace
    137 
    138 AssemblyProgram::AssemblyProgram(ExecutableType kind)
    139   : kind_(kind), image_base_(0) {
    140 }
    141 
    142 static void DeleteContainedLabels(const RVAToLabel& labels) {
    143   for (RVAToLabel::const_iterator p = labels.begin();  p != labels.end();  ++p)
    144     delete p->second;
    145 }
    146 
    147 AssemblyProgram::~AssemblyProgram() {
    148   for (size_t i = 0;  i < instructions_.size();  ++i) {
    149     Instruction* instruction = instructions_[i];
    150     if (instruction->op() != DEFBYTE)  // Will be in byte_instruction_cache_.
    151       delete instruction;
    152   }
    153   if (byte_instruction_cache_.get()) {
    154     for (size_t i = 0;  i < 256;  ++i)
    155       delete byte_instruction_cache_[i];
    156   }
    157   DeleteContainedLabels(rel32_labels_);
    158   DeleteContainedLabels(abs32_labels_);
    159 }
    160 
    161 CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
    162   return Emit(new(std::nothrow) PeRelocsInstruction());
    163 }
    164 
    165 CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
    166   return Emit(new(std::nothrow) ElfRelocsInstruction());
    167 }
    168 
    169 CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
    170   return Emit(new(std::nothrow) ElfARMRelocsInstruction());
    171 }
    172 
    173 CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
    174   return Emit(new(std::nothrow) OriginInstruction(rva));
    175 }
    176 
    177 CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
    178   return Emit(GetByteInstruction(byte));
    179 }
    180 
    181 CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
    182                                                 uint32 len) {
    183   return Emit(new(std::nothrow) BytesInstruction(values, len));
    184 }
    185 
    186 CheckBool AssemblyProgram::EmitRel32(Label* label) {
    187   return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
    188 }
    189 
    190 CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
    191                                         const uint8* arm_op, uint16 op_size) {
    192   return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
    193                                                         arm_op, op_size));
    194 }
    195 
    196 CheckBool AssemblyProgram::EmitAbs32(Label* label) {
    197   return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
    198 }
    199 
    200 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
    201   return FindLabel(rva, &abs32_labels_);
    202 }
    203 
    204 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
    205   return FindLabel(rva, &rel32_labels_);
    206 }
    207 
    208 void AssemblyProgram::DefaultAssignIndexes() {
    209   DefaultAssignIndexes(&abs32_labels_);
    210   DefaultAssignIndexes(&rel32_labels_);
    211 }
    212 
    213 void AssemblyProgram::UnassignIndexes() {
    214   UnassignIndexes(&abs32_labels_);
    215   UnassignIndexes(&rel32_labels_);
    216 }
    217 
    218 void AssemblyProgram::AssignRemainingIndexes() {
    219   AssignRemainingIndexes(&abs32_labels_);
    220   AssignRemainingIndexes(&rel32_labels_);
    221 }
    222 
    223 Label* AssemblyProgram::InstructionAbs32Label(
    224     const Instruction* instruction) const {
    225   if (instruction->op() == ABS32)
    226     return static_cast<const InstructionWithLabel*>(instruction)->label();
    227   return NULL;
    228 }
    229 
    230 Label* AssemblyProgram::InstructionRel32Label(
    231     const Instruction* instruction) const {
    232   if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
    233     Label* label =
    234         static_cast<const InstructionWithLabel*>(instruction)->label();
    235     return label;
    236   }
    237   return NULL;
    238 }
    239 
    240 CheckBool AssemblyProgram::Emit(Instruction* instruction) {
    241   if (!instruction)
    242     return false;
    243   bool ok = instructions_.push_back(instruction);
    244   if (!ok)
    245     delete instruction;
    246   return ok;
    247 }
    248 
    249 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
    250   Label*& slot = (*labels)[rva];
    251   if (slot == NULL) {
    252     slot = new(std::nothrow) Label(rva);
    253   }
    254   slot->count_++;
    255   return slot;
    256 }
    257 
    258 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
    259   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    260     Label* current = p->second;
    261     current->index_ = Label::kNoIndex;
    262   }
    263 }
    264 
    265 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
    266 // address order.
    267 //
    268 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
    269   int index = 0;
    270   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    271     Label* current = p->second;
    272     if (current->index_ != Label::kNoIndex)
    273       NOTREACHED();
    274     current->index_ = index;
    275     ++index;
    276   }
    277 }
    278 
    279 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
    280 // yet assigned an index.
    281 //
    282 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
    283   // An address table compresses best when each index is associated with an
    284   // address that is slight larger than the previous index.
    285 
    286   // First see which indexes have not been used.  The 'available' vector could
    287   // grow even bigger, but the number of addresses is a better starting size
    288   // than empty.
    289   std::vector<bool> available(labels->size(), true);
    290   int used = 0;
    291 
    292   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    293     int index = p->second->index_;
    294     if (index != Label::kNoIndex) {
    295       while (static_cast<size_t>(index) >= available.size())
    296         available.push_back(true);
    297       available.at(index) = false;
    298       ++used;
    299     }
    300   }
    301 
    302   VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
    303 
    304   // Are there any unused labels that happen to be adjacent following a used
    305   // label?
    306   //
    307   int fill_forward_count = 0;
    308   Label* prev = 0;
    309   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    310     Label* current = p->second;
    311     if (current->index_ == Label::kNoIndex) {
    312       int index = 0;
    313       if (prev  &&  prev->index_ != Label::kNoIndex)
    314         index = prev->index_ + 1;
    315       if (index < static_cast<int>(available.size()) && available.at(index)) {
    316         current->index_ = index;
    317         available.at(index) = false;
    318         ++fill_forward_count;
    319       }
    320     }
    321     prev = current;
    322   }
    323 
    324   // Are there any unused labels that happen to be adjacent preceeding a used
    325   // label?
    326   //
    327   int fill_backward_count = 0;
    328   prev = 0;
    329   for (RVAToLabel::reverse_iterator p = labels->rbegin();
    330        p != labels->rend();
    331        ++p) {
    332     Label* current = p->second;
    333     if (current->index_ == Label::kNoIndex) {
    334       int prev_index;
    335       if (prev)
    336         prev_index = prev->index_;
    337       else
    338         prev_index = static_cast<uint32>(available.size());
    339       if (prev_index != 0  &&
    340           prev_index != Label::kNoIndex  &&
    341           available.at(prev_index - 1)) {
    342         current->index_ = prev_index - 1;
    343         available.at(current->index_) = false;
    344         ++fill_backward_count;
    345       }
    346     }
    347     prev = current;
    348   }
    349 
    350   // Fill in any remaining indexes
    351   int fill_infill_count = 0;
    352   int index = 0;
    353   for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    354     Label* current = p->second;
    355     if (current->index_ == Label::kNoIndex) {
    356       while (!available.at(index)) {
    357         ++index;
    358       }
    359       current->index_ = index;
    360       available.at(index) = false;
    361       ++index;
    362       ++fill_infill_count;
    363     }
    364   }
    365 
    366   VLOG(1) << "  fill forward " << fill_forward_count
    367           << "  backward " << fill_backward_count
    368           << "  infill " << fill_infill_count;
    369 }
    370 
    371 typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
    372 
    373 #if defined(OS_WIN)
    374 __declspec(noinline)
    375 #endif
    376 static CheckBool DefineLabels(const RVAToLabel& labels,
    377                               EncodedProgram* encoded_format,
    378                               DefineLabelMethod define_label) {
    379   bool ok = true;
    380   for (RVAToLabel::const_iterator p = labels.begin();
    381        ok && p != labels.end();
    382        ++p) {
    383     Label* label = p->second;
    384     ok = (encoded_format->*define_label)(label->index_, label->rva_);
    385   }
    386   return ok;
    387 }
    388 
    389 EncodedProgram* AssemblyProgram::Encode() const {
    390   scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
    391   if (!encoded.get())
    392     return NULL;
    393 
    394   encoded->set_image_base(image_base_);
    395 
    396   if (!DefineLabels(abs32_labels_, encoded.get(),
    397                     &EncodedProgram::DefineAbs32Label) ||
    398       !DefineLabels(rel32_labels_, encoded.get(),
    399                     &EncodedProgram::DefineRel32Label)) {
    400     return NULL;
    401   }
    402 
    403   encoded->EndLabels();
    404 
    405   for (size_t i = 0;  i < instructions_.size();  ++i) {
    406     Instruction* instruction = instructions_[i];
    407 
    408     switch (instruction->op()) {
    409       case ORIGIN: {
    410         OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
    411         if (!encoded->AddOrigin(org->origin_rva()))
    412           return NULL;
    413         break;
    414       }
    415       case DEFBYTE: {
    416         uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
    417         if (!encoded->AddCopy(1, &b))
    418           return NULL;
    419         break;
    420       }
    421       case DEFBYTES: {
    422         const uint8* byte_values =
    423           static_cast<BytesInstruction*>(instruction)->byte_values();
    424         uint32 len = static_cast<BytesInstruction*>(instruction)->len();
    425 
    426         if (!encoded->AddCopy(len, byte_values))
    427           return NULL;
    428         break;
    429       }
    430       case REL32: {
    431         Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
    432         if (!encoded->AddRel32(label->index_))
    433           return NULL;
    434         break;
    435       }
    436       case REL32ARM: {
    437         Label* label =
    438             static_cast<InstructionWithLabelARM*>(instruction)->label();
    439         uint16 compressed_op =
    440           static_cast<InstructionWithLabelARM*>(instruction)->
    441           compressed_op();
    442         if (!encoded->AddRel32ARM(compressed_op, label->index_))
    443           return NULL;
    444         break;
    445       }
    446       case ABS32: {
    447         Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
    448         if (!encoded->AddAbs32(label->index_))
    449           return NULL;
    450         break;
    451       }
    452       case MAKEPERELOCS: {
    453         if (!encoded->AddPeMakeRelocs())
    454           return NULL;
    455         break;
    456       }
    457       case MAKEELFRELOCS: {
    458         if (!encoded->AddElfMakeRelocs())
    459           return NULL;
    460         break;
    461       }
    462       case MAKEELFARMRELOCS: {
    463         if (!encoded->AddElfARMMakeRelocs())
    464           return NULL;
    465         break;
    466       }
    467       default: {
    468         NOTREACHED() << "Unknown Insn OP kind";
    469       }
    470     }
    471   }
    472 
    473   return encoded.release();
    474 }
    475 
    476 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
    477   if (!byte_instruction_cache_.get()) {
    478     byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
    479     if (!byte_instruction_cache_.get())
    480       return NULL;
    481 
    482     for (int i = 0; i < 256; ++i) {
    483       byte_instruction_cache_[i] =
    484           new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
    485       if (!byte_instruction_cache_[i]) {
    486         for (int j = 0; j < i; ++j)
    487           delete byte_instruction_cache_[j];
    488         byte_instruction_cache_.reset();
    489         return NULL;
    490       }
    491     }
    492   }
    493 
    494   return byte_instruction_cache_[byte];
    495 }
    496 
    497 // Chosen empirically to give the best reduction in payload size for
    498 // an update from daisy_3701.98.0 to daisy_4206.0.0.
    499 const int AssemblyProgram::kLabelLowerLimit = 5;
    500 
    501 CheckBool AssemblyProgram::TrimLabels() {
    502   // For now only trim for ARM binaries
    503   if (kind() != EXE_ELF_32_ARM)
    504     return true;
    505 
    506   int lower_limit = kLabelLowerLimit;
    507 
    508   VLOG(1) << "TrimLabels: threshold " << lower_limit;
    509 
    510   // Remove underused labels from the list of labels
    511   RVAToLabel::iterator it = rel32_labels_.begin();
    512   while (it != rel32_labels_.end()) {
    513     if (it->second->count_ <= lower_limit) {
    514       rel32_labels_.erase(it++);
    515     } else {
    516       ++it;
    517     }
    518   }
    519 
    520   // Walk through the list of instructions, replacing trimmed labels
    521   // with the original machine instruction
    522   for (size_t i = 0; i < instructions_.size(); ++i) {
    523     Instruction* instruction = instructions_[i];
    524     switch (instruction->op()) {
    525       case REL32ARM: {
    526         Label* label =
    527             static_cast<InstructionWithLabelARM*>(instruction)->label();
    528         if (label->count_ <= lower_limit) {
    529           const uint8* arm_op =
    530               static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
    531           uint16 op_size =
    532               static_cast<InstructionWithLabelARM*>(instruction)->op_size();
    533 
    534           if (op_size < 1)
    535             return false;
    536           BytesInstruction* new_instruction =
    537             new(std::nothrow) BytesInstruction(arm_op, op_size);
    538           instructions_[i] = new_instruction;
    539         }
    540         break;
    541       }
    542       default:
    543         break;
    544     }
    545   }
    546 
    547   return true;
    548 }
    549 
    550 void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
    551   for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
    552        ++p) {
    553     Label* current = p->second;
    554     if (current->index_ != Label::kNoIndex)
    555       printf("%d\n", current->count_);
    556   }
    557 }
    558 
    559 void AssemblyProgram::CountRel32ARM() {
    560   PrintLabelCounts(&rel32_labels_);
    561 }
    562 
    563 ////////////////////////////////////////////////////////////////////////////////
    564 
    565 Status TrimLabels(AssemblyProgram* program) {
    566   if (program->TrimLabels())
    567     return C_OK;
    568   else
    569     return C_TRIM_FAILED;
    570 }
    571 
    572 Status Encode(AssemblyProgram* program, EncodedProgram** output) {
    573   *output = NULL;
    574   EncodedProgram *encoded = program->Encode();
    575   if (encoded) {
    576     *output = encoded;
    577     return C_OK;
    578   } else {
    579     return C_GENERAL_ERROR;
    580   }
    581 }
    582 
    583 }  // namespace courgette
    584