1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "courgette/assembly_program.h" 6 7 #include <memory.h> 8 #include <algorithm> 9 #include <map> 10 #include <set> 11 #include <sstream> 12 #include <vector> 13 14 #include "base/logging.h" 15 #include "base/memory/scoped_ptr.h" 16 17 #include "courgette/courgette.h" 18 #include "courgette/encoded_program.h" 19 20 namespace courgette { 21 22 // Opcodes of simple assembly language 23 enum OP { 24 ORIGIN, // ORIGIN <rva> - set current address for assembly. 25 MAKEPERELOCS, // Generates a base relocation table. 26 MAKEELFRELOCS, // Generates a base relocation table. 27 DEFBYTE, // DEFBYTE <value> - emit a byte literal. 28 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'. 29 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'. 30 REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference 31 MAKEELFARMRELOCS, // Generates a base relocation table. 32 DEFBYTES, // Emits any number of byte literals 33 LAST_OP 34 }; 35 36 // Base class for instructions. Because we have so many instructions we want to 37 // keep them as small as possible. For this reason we avoid virtual functions. 38 // 39 class Instruction { 40 public: 41 OP op() const { return static_cast<OP>(op_); } 42 43 protected: 44 explicit Instruction(OP op) : op_(op), info_(0) {} 45 Instruction(OP op, unsigned int info) : op_(op), info_(info) {} 46 47 uint32 op_ : 4; // A few bits to store the OP code. 48 uint32 info_ : 28; // Remaining bits in first word available to subclass. 49 50 private: 51 DISALLOW_COPY_AND_ASSIGN(Instruction); 52 }; 53 54 namespace { 55 56 // Sets the current address for the emitting instructions. 57 class OriginInstruction : public Instruction { 58 public: 59 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {} 60 RVA origin_rva() const { return rva_; } 61 private: 62 RVA rva_; 63 }; 64 65 // Emits an entire PE base relocation table. 66 class PeRelocsInstruction : public Instruction { 67 public: 68 PeRelocsInstruction() : Instruction(MAKEPERELOCS) {} 69 }; 70 71 // Emits an ELF relocation table. 72 class ElfRelocsInstruction : public Instruction { 73 public: 74 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {} 75 }; 76 77 // Emits an ELF ARM relocation table. 78 class ElfARMRelocsInstruction : public Instruction { 79 public: 80 ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {} 81 }; 82 83 // Emits a single byte. 84 class ByteInstruction : public Instruction { 85 public: 86 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {} 87 uint8 byte_value() const { return info_; } 88 }; 89 90 // Emits a single byte. 91 class BytesInstruction : public Instruction { 92 public: 93 BytesInstruction(const uint8* values, uint32 len) 94 : Instruction(DEFBYTES, 0), 95 values_(values), 96 len_(len) {} 97 const uint8* byte_values() const { return values_; } 98 uint32 len() const { return len_; } 99 100 private: 101 const uint8* values_; 102 uint32 len_; 103 }; 104 105 // A ABS32 to REL32 instruction emits a reference to a label's address. 106 class InstructionWithLabel : public Instruction { 107 public: 108 InstructionWithLabel(OP op, Label* label) 109 : Instruction(op, 0), label_(label) { 110 if (label == NULL) NOTREACHED(); 111 } 112 Label* label() const { return label_; } 113 protected: 114 Label* label_; 115 }; 116 117 // An ARM REL32 instruction emits a reference to a label's address and 118 // a specially-compressed ARM op. 119 class InstructionWithLabelARM : public InstructionWithLabel { 120 public: 121 InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label, 122 const uint8* arm_op, uint16 op_size) 123 : InstructionWithLabel(op, label), compressed_op_(compressed_op), 124 arm_op_(arm_op), op_size_(op_size) { 125 if (label == NULL) NOTREACHED(); 126 } 127 uint16 compressed_op() const { return compressed_op_; } 128 const uint8* arm_op() const { return arm_op_; } 129 uint16 op_size() const { return op_size_; } 130 private: 131 uint16 compressed_op_; 132 const uint8* arm_op_; 133 uint16 op_size_; 134 }; 135 136 } // namespace 137 138 AssemblyProgram::AssemblyProgram(ExecutableType kind) 139 : kind_(kind), image_base_(0) { 140 } 141 142 static void DeleteContainedLabels(const RVAToLabel& labels) { 143 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) 144 delete p->second; 145 } 146 147 AssemblyProgram::~AssemblyProgram() { 148 for (size_t i = 0; i < instructions_.size(); ++i) { 149 Instruction* instruction = instructions_[i]; 150 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_. 151 delete instruction; 152 } 153 if (byte_instruction_cache_.get()) { 154 for (size_t i = 0; i < 256; ++i) 155 delete byte_instruction_cache_[i]; 156 } 157 DeleteContainedLabels(rel32_labels_); 158 DeleteContainedLabels(abs32_labels_); 159 } 160 161 CheckBool AssemblyProgram::EmitPeRelocsInstruction() { 162 return Emit(new(std::nothrow) PeRelocsInstruction()); 163 } 164 165 CheckBool AssemblyProgram::EmitElfRelocationInstruction() { 166 return Emit(new(std::nothrow) ElfRelocsInstruction()); 167 } 168 169 CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() { 170 return Emit(new(std::nothrow) ElfARMRelocsInstruction()); 171 } 172 173 CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) { 174 return Emit(new(std::nothrow) OriginInstruction(rva)); 175 } 176 177 CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) { 178 return Emit(GetByteInstruction(byte)); 179 } 180 181 CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values, 182 uint32 len) { 183 return Emit(new(std::nothrow) BytesInstruction(values, len)); 184 } 185 186 CheckBool AssemblyProgram::EmitRel32(Label* label) { 187 return Emit(new(std::nothrow) InstructionWithLabel(REL32, label)); 188 } 189 190 CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label, 191 const uint8* arm_op, uint16 op_size) { 192 return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label, 193 arm_op, op_size)); 194 } 195 196 CheckBool AssemblyProgram::EmitAbs32(Label* label) { 197 return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label)); 198 } 199 200 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) { 201 return FindLabel(rva, &abs32_labels_); 202 } 203 204 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) { 205 return FindLabel(rva, &rel32_labels_); 206 } 207 208 void AssemblyProgram::DefaultAssignIndexes() { 209 DefaultAssignIndexes(&abs32_labels_); 210 DefaultAssignIndexes(&rel32_labels_); 211 } 212 213 void AssemblyProgram::UnassignIndexes() { 214 UnassignIndexes(&abs32_labels_); 215 UnassignIndexes(&rel32_labels_); 216 } 217 218 void AssemblyProgram::AssignRemainingIndexes() { 219 AssignRemainingIndexes(&abs32_labels_); 220 AssignRemainingIndexes(&rel32_labels_); 221 } 222 223 Label* AssemblyProgram::InstructionAbs32Label( 224 const Instruction* instruction) const { 225 if (instruction->op() == ABS32) 226 return static_cast<const InstructionWithLabel*>(instruction)->label(); 227 return NULL; 228 } 229 230 Label* AssemblyProgram::InstructionRel32Label( 231 const Instruction* instruction) const { 232 if (instruction->op() == REL32 || instruction->op() == REL32ARM) { 233 Label* label = 234 static_cast<const InstructionWithLabel*>(instruction)->label(); 235 return label; 236 } 237 return NULL; 238 } 239 240 CheckBool AssemblyProgram::Emit(Instruction* instruction) { 241 if (!instruction) 242 return false; 243 bool ok = instructions_.push_back(instruction); 244 if (!ok) 245 delete instruction; 246 return ok; 247 } 248 249 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) { 250 Label*& slot = (*labels)[rva]; 251 if (slot == NULL) { 252 slot = new(std::nothrow) Label(rva); 253 } 254 slot->count_++; 255 return slot; 256 } 257 258 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) { 259 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { 260 Label* current = p->second; 261 current->index_ = Label::kNoIndex; 262 } 263 } 264 265 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing 266 // address order. 267 // 268 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) { 269 int index = 0; 270 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { 271 Label* current = p->second; 272 if (current->index_ != Label::kNoIndex) 273 NOTREACHED(); 274 current->index_ = index; 275 ++index; 276 } 277 } 278 279 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not 280 // yet assigned an index. 281 // 282 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) { 283 // An address table compresses best when each index is associated with an 284 // address that is slight larger than the previous index. 285 286 // First see which indexes have not been used. The 'available' vector could 287 // grow even bigger, but the number of addresses is a better starting size 288 // than empty. 289 std::vector<bool> available(labels->size(), true); 290 int used = 0; 291 292 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { 293 int index = p->second->index_; 294 if (index != Label::kNoIndex) { 295 while (static_cast<size_t>(index) >= available.size()) 296 available.push_back(true); 297 available.at(index) = false; 298 ++used; 299 } 300 } 301 302 VLOG(1) << used << " of " << labels->size() << " labels pre-assigned"; 303 304 // Are there any unused labels that happen to be adjacent following a used 305 // label? 306 // 307 int fill_forward_count = 0; 308 Label* prev = 0; 309 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { 310 Label* current = p->second; 311 if (current->index_ == Label::kNoIndex) { 312 int index = 0; 313 if (prev && prev->index_ != Label::kNoIndex) 314 index = prev->index_ + 1; 315 if (index < static_cast<int>(available.size()) && available.at(index)) { 316 current->index_ = index; 317 available.at(index) = false; 318 ++fill_forward_count; 319 } 320 } 321 prev = current; 322 } 323 324 // Are there any unused labels that happen to be adjacent preceeding a used 325 // label? 326 // 327 int fill_backward_count = 0; 328 prev = 0; 329 for (RVAToLabel::reverse_iterator p = labels->rbegin(); 330 p != labels->rend(); 331 ++p) { 332 Label* current = p->second; 333 if (current->index_ == Label::kNoIndex) { 334 int prev_index; 335 if (prev) 336 prev_index = prev->index_; 337 else 338 prev_index = static_cast<uint32>(available.size()); 339 if (prev_index != 0 && 340 prev_index != Label::kNoIndex && 341 available.at(prev_index - 1)) { 342 current->index_ = prev_index - 1; 343 available.at(current->index_) = false; 344 ++fill_backward_count; 345 } 346 } 347 prev = current; 348 } 349 350 // Fill in any remaining indexes 351 int fill_infill_count = 0; 352 int index = 0; 353 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) { 354 Label* current = p->second; 355 if (current->index_ == Label::kNoIndex) { 356 while (!available.at(index)) { 357 ++index; 358 } 359 current->index_ = index; 360 available.at(index) = false; 361 ++index; 362 ++fill_infill_count; 363 } 364 } 365 366 VLOG(1) << " fill forward " << fill_forward_count 367 << " backward " << fill_backward_count 368 << " infill " << fill_infill_count; 369 } 370 371 typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value); 372 373 #if defined(OS_WIN) 374 __declspec(noinline) 375 #endif 376 static CheckBool DefineLabels(const RVAToLabel& labels, 377 EncodedProgram* encoded_format, 378 DefineLabelMethod define_label) { 379 bool ok = true; 380 for (RVAToLabel::const_iterator p = labels.begin(); 381 ok && p != labels.end(); 382 ++p) { 383 Label* label = p->second; 384 ok = (encoded_format->*define_label)(label->index_, label->rva_); 385 } 386 return ok; 387 } 388 389 EncodedProgram* AssemblyProgram::Encode() const { 390 scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram()); 391 if (!encoded.get()) 392 return NULL; 393 394 encoded->set_image_base(image_base_); 395 396 if (!DefineLabels(abs32_labels_, encoded.get(), 397 &EncodedProgram::DefineAbs32Label) || 398 !DefineLabels(rel32_labels_, encoded.get(), 399 &EncodedProgram::DefineRel32Label)) { 400 return NULL; 401 } 402 403 encoded->EndLabels(); 404 405 for (size_t i = 0; i < instructions_.size(); ++i) { 406 Instruction* instruction = instructions_[i]; 407 408 switch (instruction->op()) { 409 case ORIGIN: { 410 OriginInstruction* org = static_cast<OriginInstruction*>(instruction); 411 if (!encoded->AddOrigin(org->origin_rva())) 412 return NULL; 413 break; 414 } 415 case DEFBYTE: { 416 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value(); 417 if (!encoded->AddCopy(1, &b)) 418 return NULL; 419 break; 420 } 421 case DEFBYTES: { 422 const uint8* byte_values = 423 static_cast<BytesInstruction*>(instruction)->byte_values(); 424 uint32 len = static_cast<BytesInstruction*>(instruction)->len(); 425 426 if (!encoded->AddCopy(len, byte_values)) 427 return NULL; 428 break; 429 } 430 case REL32: { 431 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); 432 if (!encoded->AddRel32(label->index_)) 433 return NULL; 434 break; 435 } 436 case REL32ARM: { 437 Label* label = 438 static_cast<InstructionWithLabelARM*>(instruction)->label(); 439 uint16 compressed_op = 440 static_cast<InstructionWithLabelARM*>(instruction)-> 441 compressed_op(); 442 if (!encoded->AddRel32ARM(compressed_op, label->index_)) 443 return NULL; 444 break; 445 } 446 case ABS32: { 447 Label* label = static_cast<InstructionWithLabel*>(instruction)->label(); 448 if (!encoded->AddAbs32(label->index_)) 449 return NULL; 450 break; 451 } 452 case MAKEPERELOCS: { 453 if (!encoded->AddPeMakeRelocs(kind_)) 454 return NULL; 455 break; 456 } 457 case MAKEELFRELOCS: { 458 if (!encoded->AddElfMakeRelocs()) 459 return NULL; 460 break; 461 } 462 case MAKEELFARMRELOCS: { 463 if (!encoded->AddElfARMMakeRelocs()) 464 return NULL; 465 break; 466 } 467 default: { 468 NOTREACHED() << "Unknown Insn OP kind"; 469 } 470 } 471 } 472 473 return encoded.release(); 474 } 475 476 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) { 477 if (!byte_instruction_cache_.get()) { 478 byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]); 479 if (!byte_instruction_cache_.get()) 480 return NULL; 481 482 for (int i = 0; i < 256; ++i) { 483 byte_instruction_cache_[i] = 484 new(std::nothrow) ByteInstruction(static_cast<uint8>(i)); 485 if (!byte_instruction_cache_[i]) { 486 for (int j = 0; j < i; ++j) 487 delete byte_instruction_cache_[j]; 488 byte_instruction_cache_.reset(); 489 return NULL; 490 } 491 } 492 } 493 494 return byte_instruction_cache_[byte]; 495 } 496 497 // Chosen empirically to give the best reduction in payload size for 498 // an update from daisy_3701.98.0 to daisy_4206.0.0. 499 const int AssemblyProgram::kLabelLowerLimit = 5; 500 501 CheckBool AssemblyProgram::TrimLabels() { 502 // For now only trim for ARM binaries 503 if (kind() != EXE_ELF_32_ARM) 504 return true; 505 506 int lower_limit = kLabelLowerLimit; 507 508 VLOG(1) << "TrimLabels: threshold " << lower_limit; 509 510 // Remove underused labels from the list of labels 511 RVAToLabel::iterator it = rel32_labels_.begin(); 512 while (it != rel32_labels_.end()) { 513 if (it->second->count_ <= lower_limit) { 514 rel32_labels_.erase(it++); 515 } else { 516 ++it; 517 } 518 } 519 520 // Walk through the list of instructions, replacing trimmed labels 521 // with the original machine instruction 522 for (size_t i = 0; i < instructions_.size(); ++i) { 523 Instruction* instruction = instructions_[i]; 524 switch (instruction->op()) { 525 case REL32ARM: { 526 Label* label = 527 static_cast<InstructionWithLabelARM*>(instruction)->label(); 528 if (label->count_ <= lower_limit) { 529 const uint8* arm_op = 530 static_cast<InstructionWithLabelARM*>(instruction)->arm_op(); 531 uint16 op_size = 532 static_cast<InstructionWithLabelARM*>(instruction)->op_size(); 533 534 if (op_size < 1) 535 return false; 536 BytesInstruction* new_instruction = 537 new(std::nothrow) BytesInstruction(arm_op, op_size); 538 instructions_[i] = new_instruction; 539 } 540 break; 541 } 542 default: 543 break; 544 } 545 } 546 547 return true; 548 } 549 550 void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) { 551 for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end(); 552 ++p) { 553 Label* current = p->second; 554 if (current->index_ != Label::kNoIndex) 555 printf("%d\n", current->count_); 556 } 557 } 558 559 void AssemblyProgram::CountRel32ARM() { 560 PrintLabelCounts(&rel32_labels_); 561 } 562 563 //////////////////////////////////////////////////////////////////////////////// 564 565 Status TrimLabels(AssemblyProgram* program) { 566 if (program->TrimLabels()) 567 return C_OK; 568 else 569 return C_TRIM_FAILED; 570 } 571 572 Status Encode(AssemblyProgram* program, EncodedProgram** output) { 573 *output = NULL; 574 EncodedProgram *encoded = program->Encode(); 575 if (encoded) { 576 *output = encoded; 577 return C_OK; 578 } else { 579 return C_GENERAL_ERROR; 580 } 581 } 582 583 } // namespace courgette 584