Home | History | Annotate | Download | only in ADT
      1 //===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_ADT_STRINGREF_H
     11 #define LLVM_ADT_STRINGREF_H
     12 
     13 #include "llvm/ADT/STLExtras.h"
     14 #include "llvm/ADT/iterator_range.h"
     15 #include "llvm/Support/Compiler.h"
     16 #include <algorithm>
     17 #include <cassert>
     18 #include <cstring>
     19 #include <limits>
     20 #include <string>
     21 #include <utility>
     22 
     23 namespace llvm {
     24   template <typename T>
     25   class SmallVectorImpl;
     26   class APInt;
     27   class hash_code;
     28   class StringRef;
     29 
     30   /// Helper functions for StringRef::getAsInteger.
     31   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
     32                             unsigned long long &Result);
     33 
     34   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
     35 
     36   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
     37                               unsigned long long &Result);
     38   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
     39 
     40   /// StringRef - Represent a constant reference to a string, i.e. a character
     41   /// array and a length, which need not be null terminated.
     42   ///
     43   /// This class does not own the string data, it is expected to be used in
     44   /// situations where the character data resides in some other buffer, whose
     45   /// lifetime extends past that of the StringRef. For this reason, it is not in
     46   /// general safe to store a StringRef.
     47   class StringRef {
     48   public:
     49     typedef const char *iterator;
     50     typedef const char *const_iterator;
     51     static const size_t npos = ~size_t(0);
     52     typedef size_t size_type;
     53 
     54   private:
     55     /// The start of the string, in an external buffer.
     56     const char *Data = nullptr;
     57 
     58     /// The length of the string.
     59     size_t Length = 0;
     60 
     61     // Workaround memcmp issue with null pointers (undefined behavior)
     62     // by providing a specialized version
     63     LLVM_ATTRIBUTE_ALWAYS_INLINE
     64     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
     65       if (Length == 0) { return 0; }
     66       return ::memcmp(Lhs,Rhs,Length);
     67     }
     68 
     69   public:
     70     /// @name Constructors
     71     /// @{
     72 
     73     /// Construct an empty string ref.
     74     /*implicit*/ StringRef() = default;
     75 
     76     /// Disable conversion from nullptr.  This prevents things like
     77     /// if (S == nullptr)
     78     StringRef(std::nullptr_t) = delete;
     79 
     80     /// Construct a string ref from a cstring.
     81     LLVM_ATTRIBUTE_ALWAYS_INLINE
     82     /*implicit*/ StringRef(const char *Str)
     83         : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
     84 
     85     /// Construct a string ref from a pointer and length.
     86     LLVM_ATTRIBUTE_ALWAYS_INLINE
     87     /*implicit*/ constexpr StringRef(const char *data, size_t length)
     88         : Data(data), Length(length) {}
     89 
     90     /// Construct a string ref from an std::string.
     91     LLVM_ATTRIBUTE_ALWAYS_INLINE
     92     /*implicit*/ StringRef(const std::string &Str)
     93       : Data(Str.data()), Length(Str.length()) {}
     94 
     95     static StringRef withNullAsEmpty(const char *data) {
     96       return StringRef(data ? data : "");
     97     }
     98 
     99     /// @}
    100     /// @name Iterators
    101     /// @{
    102 
    103     iterator begin() const { return Data; }
    104 
    105     iterator end() const { return Data + Length; }
    106 
    107     const unsigned char *bytes_begin() const {
    108       return reinterpret_cast<const unsigned char *>(begin());
    109     }
    110     const unsigned char *bytes_end() const {
    111       return reinterpret_cast<const unsigned char *>(end());
    112     }
    113     iterator_range<const unsigned char *> bytes() const {
    114       return make_range(bytes_begin(), bytes_end());
    115     }
    116 
    117     /// @}
    118     /// @name String Operations
    119     /// @{
    120 
    121     /// data - Get a pointer to the start of the string (which may not be null
    122     /// terminated).
    123     LLVM_NODISCARD
    124     LLVM_ATTRIBUTE_ALWAYS_INLINE
    125     const char *data() const { return Data; }
    126 
    127     /// empty - Check if the string is empty.
    128     LLVM_NODISCARD
    129     LLVM_ATTRIBUTE_ALWAYS_INLINE
    130     bool empty() const { return Length == 0; }
    131 
    132     /// size - Get the string size.
    133     LLVM_NODISCARD
    134     LLVM_ATTRIBUTE_ALWAYS_INLINE
    135     size_t size() const { return Length; }
    136 
    137     /// front - Get the first character in the string.
    138     LLVM_NODISCARD
    139     char front() const {
    140       assert(!empty());
    141       return Data[0];
    142     }
    143 
    144     /// back - Get the last character in the string.
    145     LLVM_NODISCARD
    146     char back() const {
    147       assert(!empty());
    148       return Data[Length-1];
    149     }
    150 
    151     // copy - Allocate copy in Allocator and return StringRef to it.
    152     template <typename Allocator>
    153     LLVM_NODISCARD StringRef copy(Allocator &A) const {
    154       // Don't request a length 0 copy from the allocator.
    155       if (empty())
    156         return StringRef();
    157       char *S = A.template Allocate<char>(Length);
    158       std::copy(begin(), end(), S);
    159       return StringRef(S, Length);
    160     }
    161 
    162     /// equals - Check for string equality, this is more efficient than
    163     /// compare() when the relative ordering of inequal strings isn't needed.
    164     LLVM_NODISCARD
    165     LLVM_ATTRIBUTE_ALWAYS_INLINE
    166     bool equals(StringRef RHS) const {
    167       return (Length == RHS.Length &&
    168               compareMemory(Data, RHS.Data, RHS.Length) == 0);
    169     }
    170 
    171     /// equals_lower - Check for string equality, ignoring case.
    172     LLVM_NODISCARD
    173     bool equals_lower(StringRef RHS) const {
    174       return Length == RHS.Length && compare_lower(RHS) == 0;
    175     }
    176 
    177     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
    178     /// is lexicographically less than, equal to, or greater than the \p RHS.
    179     LLVM_NODISCARD
    180     LLVM_ATTRIBUTE_ALWAYS_INLINE
    181     int compare(StringRef RHS) const {
    182       // Check the prefix for a mismatch.
    183       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
    184         return Res < 0 ? -1 : 1;
    185 
    186       // Otherwise the prefixes match, so we only need to check the lengths.
    187       if (Length == RHS.Length)
    188         return 0;
    189       return Length < RHS.Length ? -1 : 1;
    190     }
    191 
    192     /// compare_lower - Compare two strings, ignoring case.
    193     LLVM_NODISCARD
    194     int compare_lower(StringRef RHS) const;
    195 
    196     /// compare_numeric - Compare two strings, treating sequences of digits as
    197     /// numbers.
    198     LLVM_NODISCARD
    199     int compare_numeric(StringRef RHS) const;
    200 
    201     /// \brief Determine the edit distance between this string and another
    202     /// string.
    203     ///
    204     /// \param Other the string to compare this string against.
    205     ///
    206     /// \param AllowReplacements whether to allow character
    207     /// replacements (change one character into another) as a single
    208     /// operation, rather than as two operations (an insertion and a
    209     /// removal).
    210     ///
    211     /// \param MaxEditDistance If non-zero, the maximum edit distance that
    212     /// this routine is allowed to compute. If the edit distance will exceed
    213     /// that maximum, returns \c MaxEditDistance+1.
    214     ///
    215     /// \returns the minimum number of character insertions, removals,
    216     /// or (if \p AllowReplacements is \c true) replacements needed to
    217     /// transform one of the given strings into the other. If zero,
    218     /// the strings are identical.
    219     LLVM_NODISCARD
    220     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
    221                            unsigned MaxEditDistance = 0) const;
    222 
    223     /// str - Get the contents as an std::string.
    224     LLVM_NODISCARD
    225     std::string str() const {
    226       if (!Data) return std::string();
    227       return std::string(Data, Length);
    228     }
    229 
    230     /// @}
    231     /// @name Operator Overloads
    232     /// @{
    233 
    234     LLVM_NODISCARD
    235     char operator[](size_t Index) const {
    236       assert(Index < Length && "Invalid index!");
    237       return Data[Index];
    238     }
    239 
    240     /// Disallow accidental assignment from a temporary std::string.
    241     ///
    242     /// The declaration here is extra complicated so that `stringRef = {}`
    243     /// and `stringRef = "abc"` continue to select the move assignment operator.
    244     template <typename T>
    245     typename std::enable_if<std::is_same<T, std::string>::value,
    246                             StringRef>::type &
    247     operator=(T &&Str) = delete;
    248 
    249     /// @}
    250     /// @name Type Conversions
    251     /// @{
    252 
    253     operator std::string() const {
    254       return str();
    255     }
    256 
    257     /// @}
    258     /// @name String Predicates
    259     /// @{
    260 
    261     /// Check if this string starts with the given \p Prefix.
    262     LLVM_NODISCARD
    263     LLVM_ATTRIBUTE_ALWAYS_INLINE
    264     bool startswith(StringRef Prefix) const {
    265       return Length >= Prefix.Length &&
    266              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
    267     }
    268 
    269     /// Check if this string starts with the given \p Prefix, ignoring case.
    270     LLVM_NODISCARD
    271     bool startswith_lower(StringRef Prefix) const;
    272 
    273     /// Check if this string ends with the given \p Suffix.
    274     LLVM_NODISCARD
    275     LLVM_ATTRIBUTE_ALWAYS_INLINE
    276     bool endswith(StringRef Suffix) const {
    277       return Length >= Suffix.Length &&
    278         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
    279     }
    280 
    281     /// Check if this string ends with the given \p Suffix, ignoring case.
    282     LLVM_NODISCARD
    283     bool endswith_lower(StringRef Suffix) const;
    284 
    285     /// @}
    286     /// @name String Searching
    287     /// @{
    288 
    289     /// Search for the first character \p C in the string.
    290     ///
    291     /// \returns The index of the first occurrence of \p C, or npos if not
    292     /// found.
    293     LLVM_NODISCARD
    294     LLVM_ATTRIBUTE_ALWAYS_INLINE
    295     size_t find(char C, size_t From = 0) const {
    296       size_t FindBegin = std::min(From, Length);
    297       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
    298         // Just forward to memchr, which is faster than a hand-rolled loop.
    299         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
    300           return static_cast<const char *>(P) - Data;
    301       }
    302       return npos;
    303     }
    304 
    305     /// Search for the first character \p C in the string, ignoring case.
    306     ///
    307     /// \returns The index of the first occurrence of \p C, or npos if not
    308     /// found.
    309     LLVM_NODISCARD
    310     size_t find_lower(char C, size_t From = 0) const;
    311 
    312     /// Search for the first character satisfying the predicate \p F
    313     ///
    314     /// \returns The index of the first character satisfying \p F starting from
    315     /// \p From, or npos if not found.
    316     LLVM_NODISCARD
    317     LLVM_ATTRIBUTE_ALWAYS_INLINE
    318     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
    319       StringRef S = drop_front(From);
    320       while (!S.empty()) {
    321         if (F(S.front()))
    322           return size() - S.size();
    323         S = S.drop_front();
    324       }
    325       return npos;
    326     }
    327 
    328     /// Search for the first character not satisfying the predicate \p F
    329     ///
    330     /// \returns The index of the first character not satisfying \p F starting
    331     /// from \p From, or npos if not found.
    332     LLVM_NODISCARD
    333     LLVM_ATTRIBUTE_ALWAYS_INLINE
    334     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
    335       return find_if([F](char c) { return !F(c); }, From);
    336     }
    337 
    338     /// Search for the first string \p Str in the string.
    339     ///
    340     /// \returns The index of the first occurrence of \p Str, or npos if not
    341     /// found.
    342     LLVM_NODISCARD
    343     size_t find(StringRef Str, size_t From = 0) const;
    344 
    345     /// Search for the first string \p Str in the string, ignoring case.
    346     ///
    347     /// \returns The index of the first occurrence of \p Str, or npos if not
    348     /// found.
    349     LLVM_NODISCARD
    350     size_t find_lower(StringRef Str, size_t From = 0) const;
    351 
    352     /// Search for the last character \p C in the string.
    353     ///
    354     /// \returns The index of the last occurrence of \p C, or npos if not
    355     /// found.
    356     LLVM_NODISCARD
    357     size_t rfind(char C, size_t From = npos) const {
    358       From = std::min(From, Length);
    359       size_t i = From;
    360       while (i != 0) {
    361         --i;
    362         if (Data[i] == C)
    363           return i;
    364       }
    365       return npos;
    366     }
    367 
    368     /// Search for the last character \p C in the string, ignoring case.
    369     ///
    370     /// \returns The index of the last occurrence of \p C, or npos if not
    371     /// found.
    372     LLVM_NODISCARD
    373     size_t rfind_lower(char C, size_t From = npos) const;
    374 
    375     /// Search for the last string \p Str in the string.
    376     ///
    377     /// \returns The index of the last occurrence of \p Str, or npos if not
    378     /// found.
    379     LLVM_NODISCARD
    380     size_t rfind(StringRef Str) const;
    381 
    382     /// Search for the last string \p Str in the string, ignoring case.
    383     ///
    384     /// \returns The index of the last occurrence of \p Str, or npos if not
    385     /// found.
    386     LLVM_NODISCARD
    387     size_t rfind_lower(StringRef Str) const;
    388 
    389     /// Find the first character in the string that is \p C, or npos if not
    390     /// found. Same as find.
    391     LLVM_NODISCARD
    392     size_t find_first_of(char C, size_t From = 0) const {
    393       return find(C, From);
    394     }
    395 
    396     /// Find the first character in the string that is in \p Chars, or npos if
    397     /// not found.
    398     ///
    399     /// Complexity: O(size() + Chars.size())
    400     LLVM_NODISCARD
    401     size_t find_first_of(StringRef Chars, size_t From = 0) const;
    402 
    403     /// Find the first character in the string that is not \p C or npos if not
    404     /// found.
    405     LLVM_NODISCARD
    406     size_t find_first_not_of(char C, size_t From = 0) const;
    407 
    408     /// Find the first character in the string that is not in the string
    409     /// \p Chars, or npos if not found.
    410     ///
    411     /// Complexity: O(size() + Chars.size())
    412     LLVM_NODISCARD
    413     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
    414 
    415     /// Find the last character in the string that is \p C, or npos if not
    416     /// found.
    417     LLVM_NODISCARD
    418     size_t find_last_of(char C, size_t From = npos) const {
    419       return rfind(C, From);
    420     }
    421 
    422     /// Find the last character in the string that is in \p C, or npos if not
    423     /// found.
    424     ///
    425     /// Complexity: O(size() + Chars.size())
    426     LLVM_NODISCARD
    427     size_t find_last_of(StringRef Chars, size_t From = npos) const;
    428 
    429     /// Find the last character in the string that is not \p C, or npos if not
    430     /// found.
    431     LLVM_NODISCARD
    432     size_t find_last_not_of(char C, size_t From = npos) const;
    433 
    434     /// Find the last character in the string that is not in \p Chars, or
    435     /// npos if not found.
    436     ///
    437     /// Complexity: O(size() + Chars.size())
    438     LLVM_NODISCARD
    439     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
    440 
    441     /// Return true if the given string is a substring of *this, and false
    442     /// otherwise.
    443     LLVM_NODISCARD
    444     LLVM_ATTRIBUTE_ALWAYS_INLINE
    445     bool contains(StringRef Other) const { return find(Other) != npos; }
    446 
    447     /// Return true if the given character is contained in *this, and false
    448     /// otherwise.
    449     LLVM_NODISCARD
    450     LLVM_ATTRIBUTE_ALWAYS_INLINE
    451     bool contains(char C) const { return find_first_of(C) != npos; }
    452 
    453     /// Return true if the given string is a substring of *this, and false
    454     /// otherwise.
    455     LLVM_NODISCARD
    456     LLVM_ATTRIBUTE_ALWAYS_INLINE
    457     bool contains_lower(StringRef Other) const {
    458       return find_lower(Other) != npos;
    459     }
    460 
    461     /// Return true if the given character is contained in *this, and false
    462     /// otherwise.
    463     LLVM_NODISCARD
    464     LLVM_ATTRIBUTE_ALWAYS_INLINE
    465     bool contains_lower(char C) const { return find_lower(C) != npos; }
    466 
    467     /// @}
    468     /// @name Helpful Algorithms
    469     /// @{
    470 
    471     /// Return the number of occurrences of \p C in the string.
    472     LLVM_NODISCARD
    473     size_t count(char C) const {
    474       size_t Count = 0;
    475       for (size_t i = 0, e = Length; i != e; ++i)
    476         if (Data[i] == C)
    477           ++Count;
    478       return Count;
    479     }
    480 
    481     /// Return the number of non-overlapped occurrences of \p Str in
    482     /// the string.
    483     size_t count(StringRef Str) const;
    484 
    485     /// Parse the current string as an integer of the specified radix.  If
    486     /// \p Radix is specified as zero, this does radix autosensing using
    487     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
    488     ///
    489     /// If the string is invalid or if only a subset of the string is valid,
    490     /// this returns true to signify the error.  The string is considered
    491     /// erroneous if empty or if it overflows T.
    492     template <typename T>
    493     typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
    494     getAsInteger(unsigned Radix, T &Result) const {
    495       long long LLVal;
    496       if (getAsSignedInteger(*this, Radix, LLVal) ||
    497             static_cast<T>(LLVal) != LLVal)
    498         return true;
    499       Result = LLVal;
    500       return false;
    501     }
    502 
    503     template <typename T>
    504     typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
    505     getAsInteger(unsigned Radix, T &Result) const {
    506       unsigned long long ULLVal;
    507       // The additional cast to unsigned long long is required to avoid the
    508       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
    509       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
    510       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
    511           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
    512         return true;
    513       Result = ULLVal;
    514       return false;
    515     }
    516 
    517     /// Parse the current string as an integer of the specified radix.  If
    518     /// \p Radix is specified as zero, this does radix autosensing using
    519     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
    520     ///
    521     /// If the string does not begin with a number of the specified radix,
    522     /// this returns true to signify the error. The string is considered
    523     /// erroneous if empty or if it overflows T.
    524     /// The portion of the string representing the discovered numeric value
    525     /// is removed from the beginning of the string.
    526     template <typename T>
    527     typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
    528     consumeInteger(unsigned Radix, T &Result) {
    529       long long LLVal;
    530       if (consumeSignedInteger(*this, Radix, LLVal) ||
    531           static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
    532         return true;
    533       Result = LLVal;
    534       return false;
    535     }
    536 
    537     template <typename T>
    538     typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
    539     consumeInteger(unsigned Radix, T &Result) {
    540       unsigned long long ULLVal;
    541       if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
    542           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
    543         return true;
    544       Result = ULLVal;
    545       return false;
    546     }
    547 
    548     /// Parse the current string as an integer of the specified \p Radix, or of
    549     /// an autosensed radix if the \p Radix given is 0.  The current value in
    550     /// \p Result is discarded, and the storage is changed to be wide enough to
    551     /// store the parsed integer.
    552     ///
    553     /// \returns true if the string does not solely consist of a valid
    554     /// non-empty number in the appropriate base.
    555     ///
    556     /// APInt::fromString is superficially similar but assumes the
    557     /// string is well-formed in the given radix.
    558     bool getAsInteger(unsigned Radix, APInt &Result) const;
    559 
    560     /// @}
    561     /// @name String Operations
    562     /// @{
    563 
    564     // Convert the given ASCII string to lowercase.
    565     LLVM_NODISCARD
    566     std::string lower() const;
    567 
    568     /// Convert the given ASCII string to uppercase.
    569     LLVM_NODISCARD
    570     std::string upper() const;
    571 
    572     /// @}
    573     /// @name Substring Operations
    574     /// @{
    575 
    576     /// Return a reference to the substring from [Start, Start + N).
    577     ///
    578     /// \param Start The index of the starting character in the substring; if
    579     /// the index is npos or greater than the length of the string then the
    580     /// empty substring will be returned.
    581     ///
    582     /// \param N The number of characters to included in the substring. If N
    583     /// exceeds the number of characters remaining in the string, the string
    584     /// suffix (starting with \p Start) will be returned.
    585     LLVM_NODISCARD
    586     LLVM_ATTRIBUTE_ALWAYS_INLINE
    587     StringRef substr(size_t Start, size_t N = npos) const {
    588       Start = std::min(Start, Length);
    589       return StringRef(Data + Start, std::min(N, Length - Start));
    590     }
    591 
    592     /// Return a StringRef equal to 'this' but with only the first \p N
    593     /// elements remaining.  If \p N is greater than the length of the
    594     /// string, the entire string is returned.
    595     LLVM_NODISCARD
    596     LLVM_ATTRIBUTE_ALWAYS_INLINE
    597     StringRef take_front(size_t N = 1) const {
    598       if (N >= size())
    599         return *this;
    600       return drop_back(size() - N);
    601     }
    602 
    603     /// Return a StringRef equal to 'this' but with only the first \p N
    604     /// elements remaining.  If \p N is greater than the length of the
    605     /// string, the entire string is returned.
    606     LLVM_NODISCARD
    607     LLVM_ATTRIBUTE_ALWAYS_INLINE
    608     StringRef take_back(size_t N = 1) const {
    609       if (N >= size())
    610         return *this;
    611       return drop_front(size() - N);
    612     }
    613 
    614     /// Return the longest prefix of 'this' such that every character
    615     /// in the prefix satisfies the given predicate.
    616     LLVM_NODISCARD
    617     LLVM_ATTRIBUTE_ALWAYS_INLINE
    618     StringRef take_while(function_ref<bool(char)> F) const {
    619       return substr(0, find_if_not(F));
    620     }
    621 
    622     /// Return the longest prefix of 'this' such that no character in
    623     /// the prefix satisfies the given predicate.
    624     LLVM_NODISCARD
    625     LLVM_ATTRIBUTE_ALWAYS_INLINE
    626     StringRef take_until(function_ref<bool(char)> F) const {
    627       return substr(0, find_if(F));
    628     }
    629 
    630     /// Return a StringRef equal to 'this' but with the first \p N elements
    631     /// dropped.
    632     LLVM_NODISCARD
    633     LLVM_ATTRIBUTE_ALWAYS_INLINE
    634     StringRef drop_front(size_t N = 1) const {
    635       assert(size() >= N && "Dropping more elements than exist");
    636       return substr(N);
    637     }
    638 
    639     /// Return a StringRef equal to 'this' but with the last \p N elements
    640     /// dropped.
    641     LLVM_NODISCARD
    642     LLVM_ATTRIBUTE_ALWAYS_INLINE
    643     StringRef drop_back(size_t N = 1) const {
    644       assert(size() >= N && "Dropping more elements than exist");
    645       return substr(0, size()-N);
    646     }
    647 
    648     /// Return a StringRef equal to 'this', but with all characters satisfying
    649     /// the given predicate dropped from the beginning of the string.
    650     LLVM_NODISCARD
    651     LLVM_ATTRIBUTE_ALWAYS_INLINE
    652     StringRef drop_while(function_ref<bool(char)> F) const {
    653       return substr(find_if_not(F));
    654     }
    655 
    656     /// Return a StringRef equal to 'this', but with all characters not
    657     /// satisfying the given predicate dropped from the beginning of the string.
    658     LLVM_NODISCARD
    659     LLVM_ATTRIBUTE_ALWAYS_INLINE
    660     StringRef drop_until(function_ref<bool(char)> F) const {
    661       return substr(find_if(F));
    662     }
    663 
    664     /// Returns true if this StringRef has the given prefix and removes that
    665     /// prefix.
    666     LLVM_ATTRIBUTE_ALWAYS_INLINE
    667     bool consume_front(StringRef Prefix) {
    668       if (!startswith(Prefix))
    669         return false;
    670 
    671       *this = drop_front(Prefix.size());
    672       return true;
    673     }
    674 
    675     /// Returns true if this StringRef has the given suffix and removes that
    676     /// suffix.
    677     LLVM_ATTRIBUTE_ALWAYS_INLINE
    678     bool consume_back(StringRef Suffix) {
    679       if (!endswith(Suffix))
    680         return false;
    681 
    682       *this = drop_back(Suffix.size());
    683       return true;
    684     }
    685 
    686     /// Return a reference to the substring from [Start, End).
    687     ///
    688     /// \param Start The index of the starting character in the substring; if
    689     /// the index is npos or greater than the length of the string then the
    690     /// empty substring will be returned.
    691     ///
    692     /// \param End The index following the last character to include in the
    693     /// substring. If this is npos or exceeds the number of characters
    694     /// remaining in the string, the string suffix (starting with \p Start)
    695     /// will be returned. If this is less than \p Start, an empty string will
    696     /// be returned.
    697     LLVM_NODISCARD
    698     LLVM_ATTRIBUTE_ALWAYS_INLINE
    699     StringRef slice(size_t Start, size_t End) const {
    700       Start = std::min(Start, Length);
    701       End = std::min(std::max(Start, End), Length);
    702       return StringRef(Data + Start, End - Start);
    703     }
    704 
    705     /// Split into two substrings around the first occurrence of a separator
    706     /// character.
    707     ///
    708     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
    709     /// such that (*this == LHS + Separator + RHS) is true and RHS is
    710     /// maximal. If \p Separator is not in the string, then the result is a
    711     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
    712     ///
    713     /// \param Separator The character to split on.
    714     /// \returns The split substrings.
    715     LLVM_NODISCARD
    716     std::pair<StringRef, StringRef> split(char Separator) const {
    717       size_t Idx = find(Separator);
    718       if (Idx == npos)
    719         return std::make_pair(*this, StringRef());
    720       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
    721     }
    722 
    723     /// Split into two substrings around the first occurrence of a separator
    724     /// string.
    725     ///
    726     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
    727     /// such that (*this == LHS + Separator + RHS) is true and RHS is
    728     /// maximal. If \p Separator is not in the string, then the result is a
    729     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
    730     ///
    731     /// \param Separator - The string to split on.
    732     /// \return - The split substrings.
    733     LLVM_NODISCARD
    734     std::pair<StringRef, StringRef> split(StringRef Separator) const {
    735       size_t Idx = find(Separator);
    736       if (Idx == npos)
    737         return std::make_pair(*this, StringRef());
    738       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
    739     }
    740 
    741     /// Split into substrings around the occurrences of a separator string.
    742     ///
    743     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
    744     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
    745     /// elements are added to A.
    746     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
    747     /// still count when considering \p MaxSplit
    748     /// An useful invariant is that
    749     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
    750     ///
    751     /// \param A - Where to put the substrings.
    752     /// \param Separator - The string to split on.
    753     /// \param MaxSplit - The maximum number of times the string is split.
    754     /// \param KeepEmpty - True if empty substring should be added.
    755     void split(SmallVectorImpl<StringRef> &A,
    756                StringRef Separator, int MaxSplit = -1,
    757                bool KeepEmpty = true) const;
    758 
    759     /// Split into substrings around the occurrences of a separator character.
    760     ///
    761     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
    762     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
    763     /// elements are added to A.
    764     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
    765     /// still count when considering \p MaxSplit
    766     /// An useful invariant is that
    767     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
    768     ///
    769     /// \param A - Where to put the substrings.
    770     /// \param Separator - The string to split on.
    771     /// \param MaxSplit - The maximum number of times the string is split.
    772     /// \param KeepEmpty - True if empty substring should be added.
    773     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
    774                bool KeepEmpty = true) const;
    775 
    776     /// Split into two substrings around the last occurrence of a separator
    777     /// character.
    778     ///
    779     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
    780     /// such that (*this == LHS + Separator + RHS) is true and RHS is
    781     /// minimal. If \p Separator is not in the string, then the result is a
    782     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
    783     ///
    784     /// \param Separator - The character to split on.
    785     /// \return - The split substrings.
    786     LLVM_NODISCARD
    787     std::pair<StringRef, StringRef> rsplit(char Separator) const {
    788       size_t Idx = rfind(Separator);
    789       if (Idx == npos)
    790         return std::make_pair(*this, StringRef());
    791       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
    792     }
    793 
    794     /// Return string with consecutive \p Char characters starting from the
    795     /// the left removed.
    796     LLVM_NODISCARD
    797     StringRef ltrim(char Char) const {
    798       return drop_front(std::min(Length, find_first_not_of(Char)));
    799     }
    800 
    801     /// Return string with consecutive characters in \p Chars starting from
    802     /// the left removed.
    803     LLVM_NODISCARD
    804     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
    805       return drop_front(std::min(Length, find_first_not_of(Chars)));
    806     }
    807 
    808     /// Return string with consecutive \p Char characters starting from the
    809     /// right removed.
    810     LLVM_NODISCARD
    811     StringRef rtrim(char Char) const {
    812       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
    813     }
    814 
    815     /// Return string with consecutive characters in \p Chars starting from
    816     /// the right removed.
    817     LLVM_NODISCARD
    818     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
    819       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
    820     }
    821 
    822     /// Return string with consecutive \p Char characters starting from the
    823     /// left and right removed.
    824     LLVM_NODISCARD
    825     StringRef trim(char Char) const {
    826       return ltrim(Char).rtrim(Char);
    827     }
    828 
    829     /// Return string with consecutive characters in \p Chars starting from
    830     /// the left and right removed.
    831     LLVM_NODISCARD
    832     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
    833       return ltrim(Chars).rtrim(Chars);
    834     }
    835 
    836     /// @}
    837   };
    838 
    839   /// A wrapper around a string literal that serves as a proxy for constructing
    840   /// global tables of StringRefs with the length computed at compile time.
    841   /// In order to avoid the invocation of a global constructor, StringLiteral
    842   /// should *only* be used in a constexpr context, as such:
    843   ///
    844   /// constexpr StringLiteral S("test");
    845   ///
    846   class StringLiteral : public StringRef {
    847   public:
    848     template <size_t N>
    849     constexpr StringLiteral(const char (&Str)[N])
    850 #if defined(__clang__) && __has_attribute(enable_if)
    851 #pragma clang diagnostic push
    852 #pragma clang diagnostic ignored "-Wgcc-compat"
    853         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
    854                                "invalid string literal")))
    855 #pragma clang diagnostic pop
    856 #endif
    857         : StringRef(Str, N - 1) {
    858     }
    859   };
    860 
    861   /// @name StringRef Comparison Operators
    862   /// @{
    863 
    864   LLVM_ATTRIBUTE_ALWAYS_INLINE
    865   inline bool operator==(StringRef LHS, StringRef RHS) {
    866     return LHS.equals(RHS);
    867   }
    868 
    869   LLVM_ATTRIBUTE_ALWAYS_INLINE
    870   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
    871 
    872   inline bool operator<(StringRef LHS, StringRef RHS) {
    873     return LHS.compare(RHS) == -1;
    874   }
    875 
    876   inline bool operator<=(StringRef LHS, StringRef RHS) {
    877     return LHS.compare(RHS) != 1;
    878   }
    879 
    880   inline bool operator>(StringRef LHS, StringRef RHS) {
    881     return LHS.compare(RHS) == 1;
    882   }
    883 
    884   inline bool operator>=(StringRef LHS, StringRef RHS) {
    885     return LHS.compare(RHS) != -1;
    886   }
    887 
    888   inline std::string &operator+=(std::string &buffer, StringRef string) {
    889     return buffer.append(string.data(), string.size());
    890   }
    891 
    892   /// @}
    893 
    894   /// \brief Compute a hash_code for a StringRef.
    895   LLVM_NODISCARD
    896   hash_code hash_value(StringRef S);
    897 
    898   // StringRefs can be treated like a POD type.
    899   template <typename T> struct isPodLike;
    900   template <> struct isPodLike<StringRef> { static const bool value = true; };
    901 }
    902 
    903 #endif
    904