Home | History | Annotate | Download | only in ADT
      1 //===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_ADT_TWINE_H
     11 #define LLVM_ADT_TWINE_H
     12 
     13 #include "llvm/ADT/SmallVector.h"
     14 #include "llvm/ADT/StringRef.h"
     15 #include "llvm/Support/DataTypes.h"
     16 #include "llvm/Support/ErrorHandling.h"
     17 #include <cassert>
     18 #include <string>
     19 
     20 namespace llvm {
     21   class raw_ostream;
     22 
     23   /// Twine - A lightweight data structure for efficiently representing the
     24   /// concatenation of temporary values as strings.
     25   ///
     26   /// A Twine is a kind of rope, it represents a concatenated string using a
     27   /// binary-tree, where the string is the preorder of the nodes. Since the
     28   /// Twine can be efficiently rendered into a buffer when its result is used,
     29   /// it avoids the cost of generating temporary values for intermediate string
     30   /// results -- particularly in cases when the Twine result is never
     31   /// required. By explicitly tracking the type of leaf nodes, we can also avoid
     32   /// the creation of temporary strings for conversions operations (such as
     33   /// appending an integer to a string).
     34   ///
     35   /// A Twine is not intended for use directly and should not be stored, its
     36   /// implementation relies on the ability to store pointers to temporary stack
     37   /// objects which may be deallocated at the end of a statement. Twines should
     38   /// only be used accepted as const references in arguments, when an API wishes
     39   /// to accept possibly-concatenated strings.
     40   ///
     41   /// Twines support a special 'null' value, which always concatenates to form
     42   /// itself, and renders as an empty string. This can be returned from APIs to
     43   /// effectively nullify any concatenations performed on the result.
     44   ///
     45   /// \b Implementation
     46   ///
     47   /// Given the nature of a Twine, it is not possible for the Twine's
     48   /// concatenation method to construct interior nodes; the result must be
     49   /// represented inside the returned value. For this reason a Twine object
     50   /// actually holds two values, the left- and right-hand sides of a
     51   /// concatenation. We also have nullary Twine objects, which are effectively
     52   /// sentinel values that represent empty strings.
     53   ///
     54   /// Thus, a Twine can effectively have zero, one, or two children. The \see
     55   /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
     56   /// testing the number of children.
     57   ///
     58   /// We maintain a number of invariants on Twine objects (FIXME: Why):
     59   ///  - Nullary twines are always represented with their Kind on the left-hand
     60   ///    side, and the Empty kind on the right-hand side.
     61   ///  - Unary twines are always represented with the value on the left-hand
     62   ///    side, and the Empty kind on the right-hand side.
     63   ///  - If a Twine has another Twine as a child, that child should always be
     64   ///    binary (otherwise it could have been folded into the parent).
     65   ///
     66   /// These invariants are check by \see isValid().
     67   ///
     68   /// \b Efficiency Considerations
     69   ///
     70   /// The Twine is designed to yield efficient and small code for common
     71   /// situations. For this reason, the concat() method is inlined so that
     72   /// concatenations of leaf nodes can be optimized into stores directly into a
     73   /// single stack allocated object.
     74   ///
     75   /// In practice, not all compilers can be trusted to optimize concat() fully,
     76   /// so we provide two additional methods (and accompanying operator+
     77   /// overloads) to guarantee that particularly important cases (cstring plus
     78   /// StringRef) codegen as desired.
     79   class Twine {
     80     /// NodeKind - Represent the type of an argument.
     81     enum NodeKind : unsigned char {
     82       /// An empty string; the result of concatenating anything with it is also
     83       /// empty.
     84       NullKind,
     85 
     86       /// The empty string.
     87       EmptyKind,
     88 
     89       /// A pointer to a Twine instance.
     90       TwineKind,
     91 
     92       /// A pointer to a C string instance.
     93       CStringKind,
     94 
     95       /// A pointer to an std::string instance.
     96       StdStringKind,
     97 
     98       /// A pointer to a StringRef instance.
     99       StringRefKind,
    100 
    101       /// A pointer to a SmallString instance.
    102       SmallStringKind,
    103 
    104       /// A char value reinterpreted as a pointer, to render as a character.
    105       CharKind,
    106 
    107       /// An unsigned int value reinterpreted as a pointer, to render as an
    108       /// unsigned decimal integer.
    109       DecUIKind,
    110 
    111       /// An int value reinterpreted as a pointer, to render as a signed
    112       /// decimal integer.
    113       DecIKind,
    114 
    115       /// A pointer to an unsigned long value, to render as an unsigned decimal
    116       /// integer.
    117       DecULKind,
    118 
    119       /// A pointer to a long value, to render as a signed decimal integer.
    120       DecLKind,
    121 
    122       /// A pointer to an unsigned long long value, to render as an unsigned
    123       /// decimal integer.
    124       DecULLKind,
    125 
    126       /// A pointer to a long long value, to render as a signed decimal integer.
    127       DecLLKind,
    128 
    129       /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
    130       /// integer.
    131       UHexKind
    132     };
    133 
    134     union Child
    135     {
    136       const Twine *twine;
    137       const char *cString;
    138       const std::string *stdString;
    139       const StringRef *stringRef;
    140       const SmallVectorImpl<char> *smallString;
    141       char character;
    142       unsigned int decUI;
    143       int decI;
    144       const unsigned long *decUL;
    145       const long *decL;
    146       const unsigned long long *decULL;
    147       const long long *decLL;
    148       const uint64_t *uHex;
    149     };
    150 
    151   private:
    152     /// LHS - The prefix in the concatenation, which may be uninitialized for
    153     /// Null or Empty kinds.
    154     Child LHS;
    155     /// RHS - The suffix in the concatenation, which may be uninitialized for
    156     /// Null or Empty kinds.
    157     Child RHS;
    158     /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
    159     NodeKind LHSKind;
    160     /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
    161     NodeKind RHSKind;
    162 
    163   private:
    164     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
    165     explicit Twine(NodeKind Kind)
    166       : LHSKind(Kind), RHSKind(EmptyKind) {
    167       assert(isNullary() && "Invalid kind!");
    168     }
    169 
    170     /// Construct a binary twine.
    171     explicit Twine(const Twine &LHS, const Twine &RHS)
    172         : LHSKind(TwineKind), RHSKind(TwineKind) {
    173       this->LHS.twine = &LHS;
    174       this->RHS.twine = &RHS;
    175       assert(isValid() && "Invalid twine!");
    176     }
    177 
    178     /// Construct a twine from explicit values.
    179     explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
    180         : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
    181       assert(isValid() && "Invalid twine!");
    182     }
    183 
    184     /// Since the intended use of twines is as temporary objects, assignments
    185     /// when concatenating might cause undefined behavior or stack corruptions
    186     Twine &operator=(const Twine &Other) = delete;
    187 
    188     /// Check for the null twine.
    189     bool isNull() const {
    190       return getLHSKind() == NullKind;
    191     }
    192 
    193     /// Check for the empty twine.
    194     bool isEmpty() const {
    195       return getLHSKind() == EmptyKind;
    196     }
    197 
    198     /// Check if this is a nullary twine (null or empty).
    199     bool isNullary() const {
    200       return isNull() || isEmpty();
    201     }
    202 
    203     /// Check if this is a unary twine.
    204     bool isUnary() const {
    205       return getRHSKind() == EmptyKind && !isNullary();
    206     }
    207 
    208     /// Check if this is a binary twine.
    209     bool isBinary() const {
    210       return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
    211     }
    212 
    213     /// Check if this is a valid twine (satisfying the invariants on
    214     /// order and number of arguments).
    215     bool isValid() const {
    216       // Nullary twines always have Empty on the RHS.
    217       if (isNullary() && getRHSKind() != EmptyKind)
    218         return false;
    219 
    220       // Null should never appear on the RHS.
    221       if (getRHSKind() == NullKind)
    222         return false;
    223 
    224       // The RHS cannot be non-empty if the LHS is empty.
    225       if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
    226         return false;
    227 
    228       // A twine child should always be binary.
    229       if (getLHSKind() == TwineKind &&
    230           !LHS.twine->isBinary())
    231         return false;
    232       if (getRHSKind() == TwineKind &&
    233           !RHS.twine->isBinary())
    234         return false;
    235 
    236       return true;
    237     }
    238 
    239     /// Get the NodeKind of the left-hand side.
    240     NodeKind getLHSKind() const { return LHSKind; }
    241 
    242     /// Get the NodeKind of the right-hand side.
    243     NodeKind getRHSKind() const { return RHSKind; }
    244 
    245     /// Print one child from a twine.
    246     void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
    247 
    248     /// Print the representation of one child from a twine.
    249     void printOneChildRepr(raw_ostream &OS, Child Ptr,
    250                            NodeKind Kind) const;
    251 
    252   public:
    253     /// @name Constructors
    254     /// @{
    255 
    256     /// Construct from an empty string.
    257     /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) {
    258       assert(isValid() && "Invalid twine!");
    259     }
    260 
    261     Twine(const Twine &) = default;
    262 
    263     /// Construct from a C string.
    264     ///
    265     /// We take care here to optimize "" into the empty twine -- this will be
    266     /// optimized out for string constants. This allows Twine arguments have
    267     /// default "" values, without introducing unnecessary string constants.
    268     /*implicit*/ Twine(const char *Str)
    269       : RHSKind(EmptyKind) {
    270       if (Str[0] != '\0') {
    271         LHS.cString = Str;
    272         LHSKind = CStringKind;
    273       } else
    274         LHSKind = EmptyKind;
    275 
    276       assert(isValid() && "Invalid twine!");
    277     }
    278 
    279     /// Construct from an std::string.
    280     /*implicit*/ Twine(const std::string &Str)
    281       : LHSKind(StdStringKind), RHSKind(EmptyKind) {
    282       LHS.stdString = &Str;
    283       assert(isValid() && "Invalid twine!");
    284     }
    285 
    286     /// Construct from a StringRef.
    287     /*implicit*/ Twine(const StringRef &Str)
    288       : LHSKind(StringRefKind), RHSKind(EmptyKind) {
    289       LHS.stringRef = &Str;
    290       assert(isValid() && "Invalid twine!");
    291     }
    292 
    293     /// Construct from a SmallString.
    294     /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
    295       : LHSKind(SmallStringKind), RHSKind(EmptyKind) {
    296       LHS.smallString = &Str;
    297       assert(isValid() && "Invalid twine!");
    298     }
    299 
    300     /// Construct from a char.
    301     explicit Twine(char Val)
    302       : LHSKind(CharKind), RHSKind(EmptyKind) {
    303       LHS.character = Val;
    304     }
    305 
    306     /// Construct from a signed char.
    307     explicit Twine(signed char Val)
    308       : LHSKind(CharKind), RHSKind(EmptyKind) {
    309       LHS.character = static_cast<char>(Val);
    310     }
    311 
    312     /// Construct from an unsigned char.
    313     explicit Twine(unsigned char Val)
    314       : LHSKind(CharKind), RHSKind(EmptyKind) {
    315       LHS.character = static_cast<char>(Val);
    316     }
    317 
    318     /// Construct a twine to print \p Val as an unsigned decimal integer.
    319     explicit Twine(unsigned Val)
    320       : LHSKind(DecUIKind), RHSKind(EmptyKind) {
    321       LHS.decUI = Val;
    322     }
    323 
    324     /// Construct a twine to print \p Val as a signed decimal integer.
    325     explicit Twine(int Val)
    326       : LHSKind(DecIKind), RHSKind(EmptyKind) {
    327       LHS.decI = Val;
    328     }
    329 
    330     /// Construct a twine to print \p Val as an unsigned decimal integer.
    331     explicit Twine(const unsigned long &Val)
    332       : LHSKind(DecULKind), RHSKind(EmptyKind) {
    333       LHS.decUL = &Val;
    334     }
    335 
    336     /// Construct a twine to print \p Val as a signed decimal integer.
    337     explicit Twine(const long &Val)
    338       : LHSKind(DecLKind), RHSKind(EmptyKind) {
    339       LHS.decL = &Val;
    340     }
    341 
    342     /// Construct a twine to print \p Val as an unsigned decimal integer.
    343     explicit Twine(const unsigned long long &Val)
    344       : LHSKind(DecULLKind), RHSKind(EmptyKind) {
    345       LHS.decULL = &Val;
    346     }
    347 
    348     /// Construct a twine to print \p Val as a signed decimal integer.
    349     explicit Twine(const long long &Val)
    350       : LHSKind(DecLLKind), RHSKind(EmptyKind) {
    351       LHS.decLL = &Val;
    352     }
    353 
    354     // FIXME: Unfortunately, to make sure this is as efficient as possible we
    355     // need extra binary constructors from particular types. We can't rely on
    356     // the compiler to be smart enough to fold operator+()/concat() down to the
    357     // right thing. Yet.
    358 
    359     /// Construct as the concatenation of a C string and a StringRef.
    360     /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
    361         : LHSKind(CStringKind), RHSKind(StringRefKind) {
    362       this->LHS.cString = LHS;
    363       this->RHS.stringRef = &RHS;
    364       assert(isValid() && "Invalid twine!");
    365     }
    366 
    367     /// Construct as the concatenation of a StringRef and a C string.
    368     /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
    369         : LHSKind(StringRefKind), RHSKind(CStringKind) {
    370       this->LHS.stringRef = &LHS;
    371       this->RHS.cString = RHS;
    372       assert(isValid() && "Invalid twine!");
    373     }
    374 
    375     /// Create a 'null' string, which is an empty string that always
    376     /// concatenates to form another empty string.
    377     static Twine createNull() {
    378       return Twine(NullKind);
    379     }
    380 
    381     /// @}
    382     /// @name Numeric Conversions
    383     /// @{
    384 
    385     // Construct a twine to print \p Val as an unsigned hexadecimal integer.
    386     static Twine utohexstr(const uint64_t &Val) {
    387       Child LHS, RHS;
    388       LHS.uHex = &Val;
    389       RHS.twine = nullptr;
    390       return Twine(LHS, UHexKind, RHS, EmptyKind);
    391     }
    392 
    393     /// @}
    394     /// @name Predicate Operations
    395     /// @{
    396 
    397     /// Check if this twine is trivially empty; a false return value does not
    398     /// necessarily mean the twine is empty.
    399     bool isTriviallyEmpty() const {
    400       return isNullary();
    401     }
    402 
    403     /// Return true if this twine can be dynamically accessed as a single
    404     /// StringRef value with getSingleStringRef().
    405     bool isSingleStringRef() const {
    406       if (getRHSKind() != EmptyKind) return false;
    407 
    408       switch (getLHSKind()) {
    409       case EmptyKind:
    410       case CStringKind:
    411       case StdStringKind:
    412       case StringRefKind:
    413       case SmallStringKind:
    414         return true;
    415       default:
    416         return false;
    417       }
    418     }
    419 
    420     /// @}
    421     /// @name String Operations
    422     /// @{
    423 
    424     Twine concat(const Twine &Suffix) const;
    425 
    426     /// @}
    427     /// @name Output & Conversion.
    428     /// @{
    429 
    430     /// Return the twine contents as a std::string.
    431     std::string str() const;
    432 
    433     /// Append the concatenated string into the given SmallString or SmallVector.
    434     void toVector(SmallVectorImpl<char> &Out) const;
    435 
    436     /// This returns the twine as a single StringRef.  This method is only valid
    437     /// if isSingleStringRef() is true.
    438     StringRef getSingleStringRef() const {
    439       assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
    440       switch (getLHSKind()) {
    441       default: llvm_unreachable("Out of sync with isSingleStringRef");
    442       case EmptyKind:      return StringRef();
    443       case CStringKind:    return StringRef(LHS.cString);
    444       case StdStringKind:  return StringRef(*LHS.stdString);
    445       case StringRefKind:  return *LHS.stringRef;
    446       case SmallStringKind:
    447         return StringRef(LHS.smallString->data(), LHS.smallString->size());
    448       }
    449     }
    450 
    451     /// This returns the twine as a single StringRef if it can be
    452     /// represented as such. Otherwise the twine is written into the given
    453     /// SmallVector and a StringRef to the SmallVector's data is returned.
    454     StringRef toStringRef(SmallVectorImpl<char> &Out) const {
    455       if (isSingleStringRef())
    456         return getSingleStringRef();
    457       toVector(Out);
    458       return StringRef(Out.data(), Out.size());
    459     }
    460 
    461     /// This returns the twine as a single null terminated StringRef if it
    462     /// can be represented as such. Otherwise the twine is written into the
    463     /// given SmallVector and a StringRef to the SmallVector's data is returned.
    464     ///
    465     /// The returned StringRef's size does not include the null terminator.
    466     StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
    467 
    468     /// Write the concatenated string represented by this twine to the
    469     /// stream \p OS.
    470     void print(raw_ostream &OS) const;
    471 
    472     /// Dump the concatenated string represented by this twine to stderr.
    473     void dump() const;
    474 
    475     /// Write the representation of this twine to the stream \p OS.
    476     void printRepr(raw_ostream &OS) const;
    477 
    478     /// Dump the representation of this twine to stderr.
    479     void dumpRepr() const;
    480 
    481     /// @}
    482   };
    483 
    484   /// @name Twine Inline Implementations
    485   /// @{
    486 
    487   inline Twine Twine::concat(const Twine &Suffix) const {
    488     // Concatenation with null is null.
    489     if (isNull() || Suffix.isNull())
    490       return Twine(NullKind);
    491 
    492     // Concatenation with empty yields the other side.
    493     if (isEmpty())
    494       return Suffix;
    495     if (Suffix.isEmpty())
    496       return *this;
    497 
    498     // Otherwise we need to create a new node, taking care to fold in unary
    499     // twines.
    500     Child NewLHS, NewRHS;
    501     NewLHS.twine = this;
    502     NewRHS.twine = &Suffix;
    503     NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
    504     if (isUnary()) {
    505       NewLHS = LHS;
    506       NewLHSKind = getLHSKind();
    507     }
    508     if (Suffix.isUnary()) {
    509       NewRHS = Suffix.LHS;
    510       NewRHSKind = Suffix.getLHSKind();
    511     }
    512 
    513     return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
    514   }
    515 
    516   inline Twine operator+(const Twine &LHS, const Twine &RHS) {
    517     return LHS.concat(RHS);
    518   }
    519 
    520   /// Additional overload to guarantee simplified codegen; this is equivalent to
    521   /// concat().
    522 
    523   inline Twine operator+(const char *LHS, const StringRef &RHS) {
    524     return Twine(LHS, RHS);
    525   }
    526 
    527   /// Additional overload to guarantee simplified codegen; this is equivalent to
    528   /// concat().
    529 
    530   inline Twine operator+(const StringRef &LHS, const char *RHS) {
    531     return Twine(LHS, RHS);
    532   }
    533 
    534   inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
    535     RHS.print(OS);
    536     return OS;
    537   }
    538 
    539   /// @}
    540 }
    541 
    542 #endif
    543