Home | History | Annotate | Download | only in ADT
      1 //===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_ADT_TWINE_H
     11 #define LLVM_ADT_TWINE_H
     12 
     13 #include "llvm/ADT/SmallVector.h"
     14 #include "llvm/ADT/StringRef.h"
     15 #include "llvm/Support/ErrorHandling.h"
     16 #include <cassert>
     17 #include <cstdint>
     18 #include <string>
     19 
     20 namespace llvm {
     21 
     22   class formatv_object_base;
     23   class raw_ostream;
     24 
     25   /// Twine - A lightweight data structure for efficiently representing the
     26   /// concatenation of temporary values as strings.
     27   ///
     28   /// A Twine is a kind of rope, it represents a concatenated string using a
     29   /// binary-tree, where the string is the preorder of the nodes. Since the
     30   /// Twine can be efficiently rendered into a buffer when its result is used,
     31   /// it avoids the cost of generating temporary values for intermediate string
     32   /// results -- particularly in cases when the Twine result is never
     33   /// required. By explicitly tracking the type of leaf nodes, we can also avoid
     34   /// the creation of temporary strings for conversions operations (such as
     35   /// appending an integer to a string).
     36   ///
     37   /// A Twine is not intended for use directly and should not be stored, its
     38   /// implementation relies on the ability to store pointers to temporary stack
     39   /// objects which may be deallocated at the end of a statement. Twines should
     40   /// only be used accepted as const references in arguments, when an API wishes
     41   /// to accept possibly-concatenated strings.
     42   ///
     43   /// Twines support a special 'null' value, which always concatenates to form
     44   /// itself, and renders as an empty string. This can be returned from APIs to
     45   /// effectively nullify any concatenations performed on the result.
     46   ///
     47   /// \b Implementation
     48   ///
     49   /// Given the nature of a Twine, it is not possible for the Twine's
     50   /// concatenation method to construct interior nodes; the result must be
     51   /// represented inside the returned value. For this reason a Twine object
     52   /// actually holds two values, the left- and right-hand sides of a
     53   /// concatenation. We also have nullary Twine objects, which are effectively
     54   /// sentinel values that represent empty strings.
     55   ///
     56   /// Thus, a Twine can effectively have zero, one, or two children. The \see
     57   /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
     58   /// testing the number of children.
     59   ///
     60   /// We maintain a number of invariants on Twine objects (FIXME: Why):
     61   ///  - Nullary twines are always represented with their Kind on the left-hand
     62   ///    side, and the Empty kind on the right-hand side.
     63   ///  - Unary twines are always represented with the value on the left-hand
     64   ///    side, and the Empty kind on the right-hand side.
     65   ///  - If a Twine has another Twine as a child, that child should always be
     66   ///    binary (otherwise it could have been folded into the parent).
     67   ///
     68   /// These invariants are check by \see isValid().
     69   ///
     70   /// \b Efficiency Considerations
     71   ///
     72   /// The Twine is designed to yield efficient and small code for common
     73   /// situations. For this reason, the concat() method is inlined so that
     74   /// concatenations of leaf nodes can be optimized into stores directly into a
     75   /// single stack allocated object.
     76   ///
     77   /// In practice, not all compilers can be trusted to optimize concat() fully,
     78   /// so we provide two additional methods (and accompanying operator+
     79   /// overloads) to guarantee that particularly important cases (cstring plus
     80   /// StringRef) codegen as desired.
     81   class Twine {
     82     /// NodeKind - Represent the type of an argument.
     83     enum NodeKind : unsigned char {
     84       /// An empty string; the result of concatenating anything with it is also
     85       /// empty.
     86       NullKind,
     87 
     88       /// The empty string.
     89       EmptyKind,
     90 
     91       /// A pointer to a Twine instance.
     92       TwineKind,
     93 
     94       /// A pointer to a C string instance.
     95       CStringKind,
     96 
     97       /// A pointer to an std::string instance.
     98       StdStringKind,
     99 
    100       /// A pointer to a StringRef instance.
    101       StringRefKind,
    102 
    103       /// A pointer to a SmallString instance.
    104       SmallStringKind,
    105 
    106       /// A pointer to a formatv_object_base instance.
    107       FormatvObjectKind,
    108 
    109       /// A char value, to render as a character.
    110       CharKind,
    111 
    112       /// An unsigned int value, to render as an unsigned decimal integer.
    113       DecUIKind,
    114 
    115       /// An int value, to render as a signed decimal integer.
    116       DecIKind,
    117 
    118       /// A pointer to an unsigned long value, to render as an unsigned decimal
    119       /// integer.
    120       DecULKind,
    121 
    122       /// A pointer to a long value, to render as a signed decimal integer.
    123       DecLKind,
    124 
    125       /// A pointer to an unsigned long long value, to render as an unsigned
    126       /// decimal integer.
    127       DecULLKind,
    128 
    129       /// A pointer to a long long value, to render as a signed decimal integer.
    130       DecLLKind,
    131 
    132       /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
    133       /// integer.
    134       UHexKind
    135     };
    136 
    137     union Child
    138     {
    139       const Twine *twine;
    140       const char *cString;
    141       const std::string *stdString;
    142       const StringRef *stringRef;
    143       const SmallVectorImpl<char> *smallString;
    144       const formatv_object_base *formatvObject;
    145       char character;
    146       unsigned int decUI;
    147       int decI;
    148       const unsigned long *decUL;
    149       const long *decL;
    150       const unsigned long long *decULL;
    151       const long long *decLL;
    152       const uint64_t *uHex;
    153     };
    154 
    155     /// LHS - The prefix in the concatenation, which may be uninitialized for
    156     /// Null or Empty kinds.
    157     Child LHS;
    158     /// RHS - The suffix in the concatenation, which may be uninitialized for
    159     /// Null or Empty kinds.
    160     Child RHS;
    161     /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
    162     NodeKind LHSKind;
    163     /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
    164     NodeKind RHSKind;
    165 
    166     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
    167     explicit Twine(NodeKind Kind)
    168       : LHSKind(Kind), RHSKind(EmptyKind) {
    169       assert(isNullary() && "Invalid kind!");
    170     }
    171 
    172     /// Construct a binary twine.
    173     explicit Twine(const Twine &LHS, const Twine &RHS)
    174         : LHSKind(TwineKind), RHSKind(TwineKind) {
    175       this->LHS.twine = &LHS;
    176       this->RHS.twine = &RHS;
    177       assert(isValid() && "Invalid twine!");
    178     }
    179 
    180     /// Construct a twine from explicit values.
    181     explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
    182         : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
    183       assert(isValid() && "Invalid twine!");
    184     }
    185 
    186     /// Check for the null twine.
    187     bool isNull() const {
    188       return getLHSKind() == NullKind;
    189     }
    190 
    191     /// Check for the empty twine.
    192     bool isEmpty() const {
    193       return getLHSKind() == EmptyKind;
    194     }
    195 
    196     /// Check if this is a nullary twine (null or empty).
    197     bool isNullary() const {
    198       return isNull() || isEmpty();
    199     }
    200 
    201     /// Check if this is a unary twine.
    202     bool isUnary() const {
    203       return getRHSKind() == EmptyKind && !isNullary();
    204     }
    205 
    206     /// Check if this is a binary twine.
    207     bool isBinary() const {
    208       return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
    209     }
    210 
    211     /// Check if this is a valid twine (satisfying the invariants on
    212     /// order and number of arguments).
    213     bool isValid() const {
    214       // Nullary twines always have Empty on the RHS.
    215       if (isNullary() && getRHSKind() != EmptyKind)
    216         return false;
    217 
    218       // Null should never appear on the RHS.
    219       if (getRHSKind() == NullKind)
    220         return false;
    221 
    222       // The RHS cannot be non-empty if the LHS is empty.
    223       if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
    224         return false;
    225 
    226       // A twine child should always be binary.
    227       if (getLHSKind() == TwineKind &&
    228           !LHS.twine->isBinary())
    229         return false;
    230       if (getRHSKind() == TwineKind &&
    231           !RHS.twine->isBinary())
    232         return false;
    233 
    234       return true;
    235     }
    236 
    237     /// Get the NodeKind of the left-hand side.
    238     NodeKind getLHSKind() const { return LHSKind; }
    239 
    240     /// Get the NodeKind of the right-hand side.
    241     NodeKind getRHSKind() const { return RHSKind; }
    242 
    243     /// Print one child from a twine.
    244     void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
    245 
    246     /// Print the representation of one child from a twine.
    247     void printOneChildRepr(raw_ostream &OS, Child Ptr,
    248                            NodeKind Kind) const;
    249 
    250   public:
    251     /// @name Constructors
    252     /// @{
    253 
    254     /// Construct from an empty string.
    255     /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) {
    256       assert(isValid() && "Invalid twine!");
    257     }
    258 
    259     Twine(const Twine &) = default;
    260 
    261     /// Construct from a C string.
    262     ///
    263     /// We take care here to optimize "" into the empty twine -- this will be
    264     /// optimized out for string constants. This allows Twine arguments have
    265     /// default "" values, without introducing unnecessary string constants.
    266     /*implicit*/ Twine(const char *Str)
    267       : RHSKind(EmptyKind) {
    268       if (Str[0] != '\0') {
    269         LHS.cString = Str;
    270         LHSKind = CStringKind;
    271       } else
    272         LHSKind = EmptyKind;
    273 
    274       assert(isValid() && "Invalid twine!");
    275     }
    276 
    277     /// Construct from an std::string.
    278     /*implicit*/ Twine(const std::string &Str)
    279       : LHSKind(StdStringKind), RHSKind(EmptyKind) {
    280       LHS.stdString = &Str;
    281       assert(isValid() && "Invalid twine!");
    282     }
    283 
    284     /// Construct from a StringRef.
    285     /*implicit*/ Twine(const StringRef &Str)
    286       : LHSKind(StringRefKind), RHSKind(EmptyKind) {
    287       LHS.stringRef = &Str;
    288       assert(isValid() && "Invalid twine!");
    289     }
    290 
    291     /// Construct from a SmallString.
    292     /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
    293       : LHSKind(SmallStringKind), RHSKind(EmptyKind) {
    294       LHS.smallString = &Str;
    295       assert(isValid() && "Invalid twine!");
    296     }
    297 
    298     /// Construct from a formatv_object_base.
    299     /*implicit*/ Twine(const formatv_object_base &Fmt)
    300         : LHSKind(FormatvObjectKind), RHSKind(EmptyKind) {
    301       LHS.formatvObject = &Fmt;
    302       assert(isValid() && "Invalid twine!");
    303     }
    304 
    305     /// Construct from a char.
    306     explicit Twine(char Val)
    307       : LHSKind(CharKind), RHSKind(EmptyKind) {
    308       LHS.character = Val;
    309     }
    310 
    311     /// Construct from a signed char.
    312     explicit Twine(signed char Val)
    313       : LHSKind(CharKind), RHSKind(EmptyKind) {
    314       LHS.character = static_cast<char>(Val);
    315     }
    316 
    317     /// Construct from an unsigned char.
    318     explicit Twine(unsigned char Val)
    319       : LHSKind(CharKind), RHSKind(EmptyKind) {
    320       LHS.character = static_cast<char>(Val);
    321     }
    322 
    323     /// Construct a twine to print \p Val as an unsigned decimal integer.
    324     explicit Twine(unsigned Val)
    325       : LHSKind(DecUIKind), RHSKind(EmptyKind) {
    326       LHS.decUI = Val;
    327     }
    328 
    329     /// Construct a twine to print \p Val as a signed decimal integer.
    330     explicit Twine(int Val)
    331       : LHSKind(DecIKind), RHSKind(EmptyKind) {
    332       LHS.decI = Val;
    333     }
    334 
    335     /// Construct a twine to print \p Val as an unsigned decimal integer.
    336     explicit Twine(const unsigned long &Val)
    337       : LHSKind(DecULKind), RHSKind(EmptyKind) {
    338       LHS.decUL = &Val;
    339     }
    340 
    341     /// Construct a twine to print \p Val as a signed decimal integer.
    342     explicit Twine(const long &Val)
    343       : LHSKind(DecLKind), RHSKind(EmptyKind) {
    344       LHS.decL = &Val;
    345     }
    346 
    347     /// Construct a twine to print \p Val as an unsigned decimal integer.
    348     explicit Twine(const unsigned long long &Val)
    349       : LHSKind(DecULLKind), RHSKind(EmptyKind) {
    350       LHS.decULL = &Val;
    351     }
    352 
    353     /// Construct a twine to print \p Val as a signed decimal integer.
    354     explicit Twine(const long long &Val)
    355       : LHSKind(DecLLKind), RHSKind(EmptyKind) {
    356       LHS.decLL = &Val;
    357     }
    358 
    359     // FIXME: Unfortunately, to make sure this is as efficient as possible we
    360     // need extra binary constructors from particular types. We can't rely on
    361     // the compiler to be smart enough to fold operator+()/concat() down to the
    362     // right thing. Yet.
    363 
    364     /// Construct as the concatenation of a C string and a StringRef.
    365     /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
    366         : LHSKind(CStringKind), RHSKind(StringRefKind) {
    367       this->LHS.cString = LHS;
    368       this->RHS.stringRef = &RHS;
    369       assert(isValid() && "Invalid twine!");
    370     }
    371 
    372     /// Construct as the concatenation of a StringRef and a C string.
    373     /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
    374         : LHSKind(StringRefKind), RHSKind(CStringKind) {
    375       this->LHS.stringRef = &LHS;
    376       this->RHS.cString = RHS;
    377       assert(isValid() && "Invalid twine!");
    378     }
    379 
    380     /// Since the intended use of twines is as temporary objects, assignments
    381     /// when concatenating might cause undefined behavior or stack corruptions
    382     Twine &operator=(const Twine &) = delete;
    383 
    384     /// Create a 'null' string, which is an empty string that always
    385     /// concatenates to form another empty string.
    386     static Twine createNull() {
    387       return Twine(NullKind);
    388     }
    389 
    390     /// @}
    391     /// @name Numeric Conversions
    392     /// @{
    393 
    394     // Construct a twine to print \p Val as an unsigned hexadecimal integer.
    395     static Twine utohexstr(const uint64_t &Val) {
    396       Child LHS, RHS;
    397       LHS.uHex = &Val;
    398       RHS.twine = nullptr;
    399       return Twine(LHS, UHexKind, RHS, EmptyKind);
    400     }
    401 
    402     /// @}
    403     /// @name Predicate Operations
    404     /// @{
    405 
    406     /// Check if this twine is trivially empty; a false return value does not
    407     /// necessarily mean the twine is empty.
    408     bool isTriviallyEmpty() const {
    409       return isNullary();
    410     }
    411 
    412     /// Return true if this twine can be dynamically accessed as a single
    413     /// StringRef value with getSingleStringRef().
    414     bool isSingleStringRef() const {
    415       if (getRHSKind() != EmptyKind) return false;
    416 
    417       switch (getLHSKind()) {
    418       case EmptyKind:
    419       case CStringKind:
    420       case StdStringKind:
    421       case StringRefKind:
    422       case SmallStringKind:
    423         return true;
    424       default:
    425         return false;
    426       }
    427     }
    428 
    429     /// @}
    430     /// @name String Operations
    431     /// @{
    432 
    433     Twine concat(const Twine &Suffix) const;
    434 
    435     /// @}
    436     /// @name Output & Conversion.
    437     /// @{
    438 
    439     /// Return the twine contents as a std::string.
    440     std::string str() const;
    441 
    442     /// Append the concatenated string into the given SmallString or SmallVector.
    443     void toVector(SmallVectorImpl<char> &Out) const;
    444 
    445     /// This returns the twine as a single StringRef.  This method is only valid
    446     /// if isSingleStringRef() is true.
    447     StringRef getSingleStringRef() const {
    448       assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
    449       switch (getLHSKind()) {
    450       default: llvm_unreachable("Out of sync with isSingleStringRef");
    451       case EmptyKind:      return StringRef();
    452       case CStringKind:    return StringRef(LHS.cString);
    453       case StdStringKind:  return StringRef(*LHS.stdString);
    454       case StringRefKind:  return *LHS.stringRef;
    455       case SmallStringKind:
    456         return StringRef(LHS.smallString->data(), LHS.smallString->size());
    457       }
    458     }
    459 
    460     /// This returns the twine as a single StringRef if it can be
    461     /// represented as such. Otherwise the twine is written into the given
    462     /// SmallVector and a StringRef to the SmallVector's data is returned.
    463     StringRef toStringRef(SmallVectorImpl<char> &Out) const {
    464       if (isSingleStringRef())
    465         return getSingleStringRef();
    466       toVector(Out);
    467       return StringRef(Out.data(), Out.size());
    468     }
    469 
    470     /// This returns the twine as a single null terminated StringRef if it
    471     /// can be represented as such. Otherwise the twine is written into the
    472     /// given SmallVector and a StringRef to the SmallVector's data is returned.
    473     ///
    474     /// The returned StringRef's size does not include the null terminator.
    475     StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
    476 
    477     /// Write the concatenated string represented by this twine to the
    478     /// stream \p OS.
    479     void print(raw_ostream &OS) const;
    480 
    481     /// Dump the concatenated string represented by this twine to stderr.
    482     void dump() const;
    483 
    484     /// Write the representation of this twine to the stream \p OS.
    485     void printRepr(raw_ostream &OS) const;
    486 
    487     /// Dump the representation of this twine to stderr.
    488     void dumpRepr() const;
    489 
    490     /// @}
    491   };
    492 
    493   /// @name Twine Inline Implementations
    494   /// @{
    495 
    496   inline Twine Twine::concat(const Twine &Suffix) const {
    497     // Concatenation with null is null.
    498     if (isNull() || Suffix.isNull())
    499       return Twine(NullKind);
    500 
    501     // Concatenation with empty yields the other side.
    502     if (isEmpty())
    503       return Suffix;
    504     if (Suffix.isEmpty())
    505       return *this;
    506 
    507     // Otherwise we need to create a new node, taking care to fold in unary
    508     // twines.
    509     Child NewLHS, NewRHS;
    510     NewLHS.twine = this;
    511     NewRHS.twine = &Suffix;
    512     NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
    513     if (isUnary()) {
    514       NewLHS = LHS;
    515       NewLHSKind = getLHSKind();
    516     }
    517     if (Suffix.isUnary()) {
    518       NewRHS = Suffix.LHS;
    519       NewRHSKind = Suffix.getLHSKind();
    520     }
    521 
    522     return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
    523   }
    524 
    525   inline Twine operator+(const Twine &LHS, const Twine &RHS) {
    526     return LHS.concat(RHS);
    527   }
    528 
    529   /// Additional overload to guarantee simplified codegen; this is equivalent to
    530   /// concat().
    531 
    532   inline Twine operator+(const char *LHS, const StringRef &RHS) {
    533     return Twine(LHS, RHS);
    534   }
    535 
    536   /// Additional overload to guarantee simplified codegen; this is equivalent to
    537   /// concat().
    538 
    539   inline Twine operator+(const StringRef &LHS, const char *RHS) {
    540     return Twine(LHS, RHS);
    541   }
    542 
    543   inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
    544     RHS.print(OS);
    545     return OS;
    546   }
    547 
    548   /// @}
    549 
    550 } // end namespace llvm
    551 
    552 #endif // LLVM_ADT_TWINE_H
    553