Home | History | Annotate | Download | only in common
      1 // -*- mode: C++ -*-
      2 
      3 // Copyright (c) 2010, Google Inc.
      4 // All rights reserved.
      5 //
      6 // Redistribution and use in source and binary forms, with or without
      7 // modification, are permitted provided that the following conditions are
      8 // met:
      9 //
     10 //     * Redistributions of source code must retain the above copyright
     11 // notice, this list of conditions and the following disclaimer.
     12 //     * Redistributions in binary form must reproduce the above
     13 // copyright notice, this list of conditions and the following disclaimer
     14 // in the documentation and/or other materials provided with the
     15 // distribution.
     16 //     * Neither the name of Google Inc. nor the names of its
     17 // contributors may be used to endorse or promote products derived from
     18 // this software without specific prior written permission.
     19 //
     20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 
     32 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     33 
     34 // test-assembler.h: interface to class for building complex binary streams.
     35 
     36 // To test the Breakpad symbol dumper and processor thoroughly, for
     37 // all combinations of host system and minidump processor
     38 // architecture, we need to be able to easily generate complex test
     39 // data like debugging information and minidump files.
     40 //
     41 // For example, if we want our unit tests to provide full code
     42 // coverage for stack walking, it may be difficult to persuade the
     43 // compiler to generate every possible sort of stack walking
     44 // information that we want to support; there are probably DWARF CFI
     45 // opcodes that GCC never emits. Similarly, if we want to test our
     46 // error handling, we will need to generate damaged minidumps or
     47 // debugging information that (we hope) the client or compiler will
     48 // never produce on its own.
     49 //
     50 // google_breakpad::TestAssembler provides a predictable and
     51 // (relatively) simple way to generate complex formatted data streams
     52 // like minidumps and CFI. Furthermore, because TestAssembler is
     53 // portable, developers without access to (say) Visual Studio or a
     54 // SPARC assembler can still work on test data for those targets.
     55 
     56 #ifndef PROCESSOR_TEST_ASSEMBLER_H_
     57 #define PROCESSOR_TEST_ASSEMBLER_H_
     58 
     59 #include <list>
     60 #include <vector>
     61 #include <string>
     62 
     63 #include "common/using_std_string.h"
     64 #include "google_breakpad/common/breakpad_types.h"
     65 
     66 namespace google_breakpad {
     67 
     68 using std::list;
     69 using std::vector;
     70 
     71 namespace test_assembler {
     72 
     73 // A Label represents a value not yet known that we need to store in a
     74 // section. As long as all the labels a section refers to are defined
     75 // by the time we retrieve its contents as bytes, we can use undefined
     76 // labels freely in that section's construction.
     77 //
     78 // A label can be in one of three states:
     79 // - undefined,
     80 // - defined as the sum of some other label and a constant, or
     81 // - a constant.
     82 //
     83 // A label's value never changes, but it can accumulate constraints.
     84 // Adding labels and integers is permitted, and yields a label.
     85 // Subtracting a constant from a label is permitted, and also yields a
     86 // label. Subtracting two labels that have some relationship to each
     87 // other is permitted, and yields a constant.
     88 //
     89 // For example:
     90 //
     91 //   Label a;               // a's value is undefined
     92 //   Label b;               // b's value is undefined
     93 //   {
     94 //     Label c = a + 4;     // okay, even though a's value is unknown
     95 //     b = c + 4;           // also okay; b is now a+8
     96 //   }
     97 //   Label d = b - 2;       // okay; d == a+6, even though c is gone
     98 //   d.Value();             // error: d's value is not yet known
     99 //   d - a;                 // is 6, even though their values are not known
    100 //   a = 12;                // now b == 20, and d == 18
    101 //   d.Value();             // 18: no longer an error
    102 //   b.Value();             // 20
    103 //   d = 10;                // error: d is already defined.
    104 //
    105 // Label objects' lifetimes are unconstrained: notice that, in the
    106 // above example, even though a and b are only related through c, and
    107 // c goes out of scope, the assignment to a sets b's value as well. In
    108 // particular, it's not necessary to ensure that a Label lives beyond
    109 // Sections that refer to it.
    110 class Label {
    111  public:
    112   Label();                      // An undefined label.
    113   Label(uint64_t value);       // A label with a fixed value
    114   Label(const Label &value);    // A label equal to another.
    115   ~Label();
    116 
    117   // Return this label's value; it must be known.
    118   //
    119   // Providing this as a cast operator is nifty, but the conversions
    120   // happen in unexpected places. In particular, ISO C++ says that
    121   // Label + size_t becomes ambigious, because it can't decide whether
    122   // to convert the Label to a uint64_t and then to a size_t, or use
    123   // the overloaded operator that returns a new label, even though the
    124   // former could fail if the label is not yet defined and the latter won't.
    125   uint64_t Value() const;
    126 
    127   Label &operator=(uint64_t value);
    128   Label &operator=(const Label &value);
    129   Label operator+(uint64_t addend) const;
    130   Label operator-(uint64_t subtrahend) const;
    131   uint64_t operator-(const Label &subtrahend) const;
    132 
    133   // We could also provide == and != that work on undefined, but
    134   // related, labels.
    135 
    136   // Return true if this label's value is known. If VALUE_P is given,
    137   // set *VALUE_P to the known value if returning true.
    138   bool IsKnownConstant(uint64_t *value_p = NULL) const;
    139 
    140   // Return true if the offset from LABEL to this label is known. If
    141   // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
    142   //
    143   // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
    144   // except that it also returns a value indicating whether the
    145   // subtraction is possible given what we currently know of l and m.
    146   // It can be possible even if we don't know l and m's values. For
    147   // example:
    148   //
    149   //   Label l, m;
    150   //   m = l + 10;
    151   //   l.IsKnownConstant();             // false
    152   //   m.IsKnownConstant();             // false
    153   //   uint64_t d;
    154   //   l.IsKnownOffsetFrom(m, &d);      // true, and sets d to -10.
    155   //   l-m                              // -10
    156   //   m-l                              // 10
    157   //   m.Value()                        // error: m's value is not known
    158   bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const;
    159 
    160  private:
    161   // A label's value, or if that is not yet known, how the value is
    162   // related to other labels' values. A binding may be:
    163   // - a known constant,
    164   // - constrained to be equal to some other binding plus a constant, or
    165   // - unconstrained, and free to take on any value.
    166   //
    167   // Many labels may point to a single binding, and each binding may
    168   // refer to another, so bindings and labels form trees whose leaves
    169   // are labels, whose interior nodes (and roots) are bindings, and
    170   // where links point from children to parents. Bindings are
    171   // reference counted, allowing labels to be lightweight, copyable,
    172   // assignable, placed in containers, and so on.
    173   class Binding {
    174    public:
    175     Binding();
    176     Binding(uint64_t addend);
    177     ~Binding();
    178 
    179     // Increment our reference count.
    180     void Acquire() { reference_count_++; };
    181     // Decrement our reference count, and return true if it is zero.
    182     bool Release() { return --reference_count_ == 0; }
    183 
    184     // Set this binding to be equal to BINDING + ADDEND. If BINDING is
    185     // NULL, then set this binding to the known constant ADDEND.
    186     // Update every binding on this binding's chain to point directly
    187     // to BINDING, or to be a constant, with addends adjusted
    188     // appropriately.
    189     void Set(Binding *binding, uint64_t value);
    190 
    191     // Return what we know about the value of this binding.
    192     // - If this binding's value is a known constant, set BASE to
    193     //   NULL, and set ADDEND to its value.
    194     // - If this binding is not a known constant but related to other
    195     //   bindings, set BASE to the binding at the end of the relation
    196     //   chain (which will always be unconstrained), and set ADDEND to the
    197     //   value to add to that binding's value to get this binding's
    198     //   value.
    199     // - If this binding is unconstrained, set BASE to this, and leave
    200     //   ADDEND unchanged.
    201     void Get(Binding **base, uint64_t *addend);
    202 
    203    private:
    204     // There are three cases:
    205     //
    206     // - A binding representing a known constant value has base_ NULL,
    207     //   and addend_ equal to the value.
    208     //
    209     // - A binding representing a completely unconstrained value has
    210     //   base_ pointing to this; addend_ is unused.
    211     //
    212     // - A binding whose value is related to some other binding's
    213     //   value has base_ pointing to that other binding, and addend_
    214     //   set to the amount to add to that binding's value to get this
    215     //   binding's value. We only represent relationships of the form
    216     //   x = y+c.
    217     //
    218     // Thus, the bind_ links form a chain terminating in either a
    219     // known constant value or a completely unconstrained value. Most
    220     // operations on bindings do path compression: they change every
    221     // binding on the chain to point directly to the final value,
    222     // adjusting addends as appropriate.
    223     Binding *base_;
    224     uint64_t addend_;
    225 
    226     // The number of Labels and Bindings pointing to this binding.
    227     // (When a binding points to itself, indicating a completely
    228     // unconstrained binding, that doesn't count as a reference.)
    229     int reference_count_;
    230   };
    231 
    232   // This label's value.
    233   Binding *value_;
    234 };
    235 
    236 inline Label operator+(uint64_t a, const Label &l) { return l + a; }
    237 // Note that int-Label isn't defined, as negating a Label is not an
    238 // operation we support.
    239 
    240 // Conventions for representing larger numbers as sequences of bytes.
    241 enum Endianness {
    242   kBigEndian,        // Big-endian: the most significant byte comes first.
    243   kLittleEndian,     // Little-endian: the least significant byte comes first.
    244   kUnsetEndian,      // used internally
    245 };
    246 
    247 // A section is a sequence of bytes, constructed by appending bytes
    248 // to the end. Sections have a convenient and flexible set of member
    249 // functions for appending data in various formats: big-endian and
    250 // little-endian signed and unsigned values of different sizes;
    251 // LEB128 and ULEB128 values (see below), and raw blocks of bytes.
    252 //
    253 // If you need to append a value to a section that is not convenient
    254 // to compute immediately, you can create a label, append the
    255 // label's value to the section, and then set the label's value
    256 // later, when it's convenient to do so. Once a label's value is
    257 // known, the section class takes care of updating all previously
    258 // appended references to it.
    259 //
    260 // Once all the labels to which a section refers have had their
    261 // values determined, you can get a copy of the section's contents
    262 // as a string.
    263 //
    264 // Note that there is no specified "start of section" label. This is
    265 // because there are typically several different meanings for "the
    266 // start of a section": the offset of the section within an object
    267 // file, the address in memory at which the section's content appear,
    268 // and so on. It's up to the code that uses the Section class to
    269 // keep track of these explicitly, as they depend on the application.
    270 class Section {
    271  public:
    272   Section(Endianness endianness = kUnsetEndian)
    273       : endianness_(endianness) { };
    274 
    275   // A base class destructor should be either public and virtual,
    276   // or protected and nonvirtual.
    277   virtual ~Section() { };
    278 
    279   // Set the default endianness of this section to ENDIANNESS. This
    280   // sets the behavior of the D<N> appending functions. If the
    281   // assembler's default endianness was set, this is the
    282   void set_endianness(Endianness endianness) {
    283     endianness_ = endianness;
    284   }
    285 
    286   // Return the default endianness of this section.
    287   Endianness endianness() const { return endianness_; }
    288 
    289   // Append the SIZE bytes at DATA or the contents of STRING to the
    290   // end of this section. Return a reference to this section.
    291   Section &Append(const uint8_t *data, size_t size) {
    292     contents_.append(reinterpret_cast<const char *>(data), size);
    293     return *this;
    294   };
    295   Section &Append(const string &data) {
    296     contents_.append(data);
    297     return *this;
    298   };
    299 
    300   // Append SIZE copies of BYTE to the end of this section. Return a
    301   // reference to this section.
    302   Section &Append(size_t size, uint8_t byte) {
    303     contents_.append(size, (char) byte);
    304     return *this;
    305   }
    306 
    307   // Append NUMBER to this section. ENDIANNESS is the endianness to
    308   // use to write the number. SIZE is the length of the number in
    309   // bytes. Return a reference to this section.
    310   Section &Append(Endianness endianness, size_t size, uint64_t number);
    311   Section &Append(Endianness endianness, size_t size, const Label &label);
    312 
    313   // Append SECTION to the end of this section. The labels SECTION
    314   // refers to need not be defined yet.
    315   //
    316   // Note that this has no effect on any Labels' values, or on
    317   // SECTION. If placing SECTION within 'this' provides new
    318   // constraints on existing labels' values, then it's up to the
    319   // caller to fiddle with those labels as needed.
    320   Section &Append(const Section &section);
    321 
    322   // Append the contents of DATA as a series of bytes terminated by
    323   // a NULL character.
    324   Section &AppendCString(const string &data) {
    325     Append(data);
    326     contents_ += '\0';
    327     return *this;
    328   }
    329 
    330   // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes
    331   // long, pad with '\0' characters.
    332   Section &AppendCString(const string &data, size_t size) {
    333     contents_.append(data, 0, size);
    334     if (data.size() < size)
    335       Append(size - data.size(), 0);
    336     return *this;
    337   }
    338 
    339   // Append VALUE or LABEL to this section, with the given bit width and
    340   // endianness. Return a reference to this section.
    341   //
    342   // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
    343   // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
    344   //                        'B' (big-endian, most significant byte first), or
    345   //                        'D' (default, the section's default endianness)
    346   // <BITWIDTH> is 8, 16, 32, or 64.
    347   //
    348   // Since endianness doesn't matter for a single byte, all the
    349   // <BITWIDTH>=8 functions are equivalent.
    350   //
    351   // These can be used to write both signed and unsigned values, as
    352   // the compiler will properly sign-extend a signed value before
    353   // passing it to the function, at which point the function's
    354   // behavior is the same either way.
    355   Section &L8(uint8_t value) { contents_ += value; return *this; }
    356   Section &B8(uint8_t value) { contents_ += value; return *this; }
    357   Section &D8(uint8_t value) { contents_ += value; return *this; }
    358   Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t),
    359           &B16(uint16_t), &B32(uint32_t), &B64(uint64_t),
    360           &D16(uint16_t), &D32(uint32_t), &D64(uint64_t);
    361   Section &L8(const Label &label),  &L16(const Label &label),
    362           &L32(const Label &label), &L64(const Label &label),
    363           &B8(const Label &label),  &B16(const Label &label),
    364           &B32(const Label &label), &B64(const Label &label),
    365           &D8(const Label &label),  &D16(const Label &label),
    366           &D32(const Label &label), &D64(const Label &label);
    367 
    368   // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
    369   //
    370   // The signed LEB128 representation of an integer N is a variable
    371   // number of bytes:
    372   //
    373   // - If N is between -0x40 and 0x3f, then its signed LEB128
    374   //   representation is a single byte whose value is N.
    375   //
    376   // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
    377   //   0x80, followed by the signed LEB128 representation of N / 128,
    378   //   rounded towards negative infinity.
    379   //
    380   // In other words, we break VALUE into groups of seven bits, put
    381   // them in little-endian order, and then write them as eight-bit
    382   // bytes with the high bit on all but the last.
    383   //
    384   // Note that VALUE cannot be a Label (we would have to implement
    385   // relaxation).
    386   Section &LEB128(long long value);
    387 
    388   // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
    389   //
    390   // The unsigned LEB128 representation of an integer N is a variable
    391   // number of bytes:
    392   //
    393   // - If N is between 0 and 0x7f, then its unsigned LEB128
    394   //   representation is a single byte whose value is N.
    395   //
    396   // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
    397   //   0x80, followed by the unsigned LEB128 representation of N /
    398   //   128, rounded towards negative infinity.
    399   //
    400   // Note that VALUE cannot be a Label (we would have to implement
    401   // relaxation).
    402   Section &ULEB128(uint64_t value);
    403 
    404   // Jump to the next location aligned on an ALIGNMENT-byte boundary,
    405   // relative to the start of the section. Fill the gap with PAD_BYTE.
    406   // ALIGNMENT must be a power of two. Return a reference to this
    407   // section.
    408   Section &Align(size_t alignment, uint8_t pad_byte = 0);
    409 
    410   // Clear the contents of this section.
    411   void Clear();
    412 
    413   // Return the current size of the section.
    414   size_t Size() const { return contents_.size(); }
    415 
    416   // Return a label representing the start of the section.
    417   //
    418   // It is up to the user whether this label represents the section's
    419   // position in an object file, the section's address in memory, or
    420   // what have you; some applications may need both, in which case
    421   // this simple-minded interface won't be enough. This class only
    422   // provides a single start label, for use with the Here and Mark
    423   // member functions.
    424   //
    425   // Ideally, we'd provide this in a subclass that actually knows more
    426   // about the application at hand and can provide an appropriate
    427   // collection of start labels. But then the appending member
    428   // functions like Append and D32 would return a reference to the
    429   // base class, not the derived class, and the chaining won't work.
    430   // Since the only value here is in pretty notation, that's a fatal
    431   // flaw.
    432   Label start() const { return start_; }
    433 
    434   // Return a label representing the point at which the next Appended
    435   // item will appear in the section, relative to start().
    436   Label Here() const { return start_ + Size(); }
    437 
    438   // Set *LABEL to Here, and return a reference to this section.
    439   Section &Mark(Label *label) { *label = Here(); return *this; }
    440 
    441   // If there are no undefined label references left in this
    442   // section, set CONTENTS to the contents of this section, as a
    443   // string, and clear this section. Return true on success, or false
    444   // if there were still undefined labels.
    445   bool GetContents(string *contents);
    446 
    447  private:
    448   // Used internally. A reference to a label's value.
    449   struct Reference {
    450     Reference(size_t set_offset, Endianness set_endianness,  size_t set_size,
    451               const Label &set_label)
    452         : offset(set_offset), endianness(set_endianness), size(set_size),
    453           label(set_label) { }
    454 
    455     // The offset of the reference within the section.
    456     size_t offset;
    457 
    458     // The endianness of the reference.
    459     Endianness endianness;
    460 
    461     // The size of the reference.
    462     size_t size;
    463 
    464     // The label to which this is a reference.
    465     Label label;
    466   };
    467 
    468   // The default endianness of this section.
    469   Endianness endianness_;
    470 
    471   // The contents of the section.
    472   string contents_;
    473 
    474   // References to labels within those contents.
    475   vector<Reference> references_;
    476 
    477   // A label referring to the beginning of the section.
    478   Label start_;
    479 };
    480 
    481 }  // namespace test_assembler
    482 }  // namespace google_breakpad
    483 
    484 #endif  // PROCESSOR_TEST_ASSEMBLER_H_
    485