Home | History | Annotate | Download | only in Core
      1 //===--- RewriteRope.h - Rope specialized for rewriter ----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file defines the RewriteRope class, which is a powerful string class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CLANG_REWRITEROPE_H
     15 #define LLVM_CLANG_REWRITEROPE_H
     16 
     17 #include "llvm/ADT/StringRef.h"
     18 #include "llvm/Support/Compiler.h"
     19 #include <cassert>
     20 #include <cstddef>
     21 #include <cstring>
     22 #include <iterator>
     23 
     24 namespace clang {
     25   //===--------------------------------------------------------------------===//
     26   // RopeRefCountString Class
     27   //===--------------------------------------------------------------------===//
     28 
     29   /// RopeRefCountString - This struct is allocated with 'new char[]' from the
     30   /// heap, and represents a reference counted chunk of string data.  When its
     31   /// ref count drops to zero, it is delete[]'d.  This is primarily managed
     32   /// through the RopePiece class below.
     33   struct RopeRefCountString {
     34     unsigned RefCount;
     35     char Data[1];  //  Variable sized.
     36 
     37     void addRef() {
     38       ++RefCount;
     39     }
     40 
     41     void dropRef() {
     42       if (--RefCount == 0)
     43         delete [] (char*)this;
     44     }
     45   };
     46 
     47   //===--------------------------------------------------------------------===//
     48   // RopePiece Class
     49   //===--------------------------------------------------------------------===//
     50 
     51   /// RopePiece - This class represents a view into a RopeRefCountString object.
     52   /// This allows references to string data to be efficiently chopped up and
     53   /// moved around without having to push around the string data itself.
     54   ///
     55   /// For example, we could have a 1M RopePiece and want to insert something
     56   /// into the middle of it.  To do this, we split it into two RopePiece objects
     57   /// that both refer to the same underlying RopeRefCountString (just with
     58   /// different offsets) which is a nice constant time operation.
     59   struct RopePiece {
     60     RopeRefCountString *StrData;
     61     unsigned StartOffs;
     62     unsigned EndOffs;
     63 
     64     RopePiece() : StrData(nullptr), StartOffs(0), EndOffs(0) {}
     65 
     66     RopePiece(RopeRefCountString *Str, unsigned Start, unsigned End)
     67       : StrData(Str), StartOffs(Start), EndOffs(End) {
     68       if (StrData)
     69         StrData->addRef();
     70     }
     71     RopePiece(const RopePiece &RP)
     72       : StrData(RP.StrData), StartOffs(RP.StartOffs), EndOffs(RP.EndOffs) {
     73       if (StrData)
     74         StrData->addRef();
     75     }
     76 
     77     ~RopePiece() {
     78       if (StrData)
     79         StrData->dropRef();
     80     }
     81 
     82     void operator=(const RopePiece &RHS) {
     83       if (StrData != RHS.StrData) {
     84         if (StrData)
     85           StrData->dropRef();
     86         StrData = RHS.StrData;
     87         if (StrData)
     88           StrData->addRef();
     89       }
     90       StartOffs = RHS.StartOffs;
     91       EndOffs = RHS.EndOffs;
     92     }
     93 
     94     const char &operator[](unsigned Offset) const {
     95       return StrData->Data[Offset+StartOffs];
     96     }
     97     char &operator[](unsigned Offset) {
     98       return StrData->Data[Offset+StartOffs];
     99     }
    100 
    101     unsigned size() const { return EndOffs-StartOffs; }
    102   };
    103 
    104   //===--------------------------------------------------------------------===//
    105   // RopePieceBTreeIterator Class
    106   //===--------------------------------------------------------------------===//
    107 
    108   /// RopePieceBTreeIterator - This class provides read-only forward iteration
    109   /// over bytes that are in a RopePieceBTree.  This first iterates over bytes
    110   /// in a RopePiece, then iterates over RopePiece's in a RopePieceBTreeLeaf,
    111   /// then iterates over RopePieceBTreeLeaf's in a RopePieceBTree.
    112   class RopePieceBTreeIterator :
    113       public std::iterator<std::forward_iterator_tag, const char, ptrdiff_t> {
    114     /// CurNode - The current B+Tree node that we are inspecting.
    115     const void /*RopePieceBTreeLeaf*/ *CurNode;
    116     /// CurPiece - The current RopePiece in the B+Tree node that we're
    117     /// inspecting.
    118     const RopePiece *CurPiece;
    119     /// CurChar - The current byte in the RopePiece we are pointing to.
    120     unsigned CurChar;
    121   public:
    122     // begin iterator.
    123     RopePieceBTreeIterator(const void /*RopePieceBTreeNode*/ *N);
    124     // end iterator
    125     RopePieceBTreeIterator()
    126       : CurNode(nullptr), CurPiece(nullptr), CurChar(0) {}
    127 
    128     char operator*() const {
    129       return (*CurPiece)[CurChar];
    130     }
    131 
    132     bool operator==(const RopePieceBTreeIterator &RHS) const {
    133       return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar;
    134     }
    135     bool operator!=(const RopePieceBTreeIterator &RHS) const {
    136       return !operator==(RHS);
    137     }
    138 
    139     RopePieceBTreeIterator& operator++() {   // Preincrement
    140       if (CurChar+1 < CurPiece->size())
    141         ++CurChar;
    142       else
    143         MoveToNextPiece();
    144       return *this;
    145     }
    146     inline RopePieceBTreeIterator operator++(int) { // Postincrement
    147       RopePieceBTreeIterator tmp = *this; ++*this; return tmp;
    148     }
    149 
    150     llvm::StringRef piece() const {
    151       return llvm::StringRef(&(*CurPiece)[0], CurPiece->size());
    152     }
    153 
    154     void MoveToNextPiece();
    155   };
    156 
    157   //===--------------------------------------------------------------------===//
    158   // RopePieceBTree Class
    159   //===--------------------------------------------------------------------===//
    160 
    161   class RopePieceBTree {
    162     void /*RopePieceBTreeNode*/ *Root;
    163     void operator=(const RopePieceBTree &) LLVM_DELETED_FUNCTION;
    164   public:
    165     RopePieceBTree();
    166     RopePieceBTree(const RopePieceBTree &RHS);
    167     ~RopePieceBTree();
    168 
    169     typedef RopePieceBTreeIterator iterator;
    170     iterator begin() const { return iterator(Root); }
    171     iterator end() const { return iterator(); }
    172     unsigned size() const;
    173     unsigned empty() const { return size() == 0; }
    174 
    175     void clear();
    176 
    177     void insert(unsigned Offset, const RopePiece &R);
    178 
    179     void erase(unsigned Offset, unsigned NumBytes);
    180   };
    181 
    182   //===--------------------------------------------------------------------===//
    183   // RewriteRope Class
    184   //===--------------------------------------------------------------------===//
    185 
    186 /// RewriteRope - A powerful string class.  This class supports extremely
    187 /// efficient insertions and deletions into the middle of it, even for
    188 /// ridiculously long strings.
    189 class RewriteRope {
    190   RopePieceBTree Chunks;
    191 
    192   /// We allocate space for string data out of a buffer of size AllocChunkSize.
    193   /// This keeps track of how much space is left.
    194   RopeRefCountString *AllocBuffer;
    195   unsigned AllocOffs;
    196   enum { AllocChunkSize = 4080 };
    197 
    198 public:
    199   RewriteRope() :  AllocBuffer(nullptr), AllocOffs(AllocChunkSize) {}
    200   RewriteRope(const RewriteRope &RHS)
    201     : Chunks(RHS.Chunks), AllocBuffer(nullptr), AllocOffs(AllocChunkSize) {
    202   }
    203 
    204   ~RewriteRope() {
    205     // If we had an allocation buffer, drop our reference to it.
    206     if (AllocBuffer)
    207       AllocBuffer->dropRef();
    208   }
    209 
    210   typedef RopePieceBTree::iterator iterator;
    211   typedef RopePieceBTree::iterator const_iterator;
    212   iterator begin() const { return Chunks.begin(); }
    213   iterator end() const  { return Chunks.end(); }
    214   unsigned size() const { return Chunks.size(); }
    215 
    216   void clear() {
    217     Chunks.clear();
    218   }
    219 
    220   void assign(const char *Start, const char *End) {
    221     clear();
    222     if (Start != End)
    223       Chunks.insert(0, MakeRopeString(Start, End));
    224   }
    225 
    226   void insert(unsigned Offset, const char *Start, const char *End) {
    227     assert(Offset <= size() && "Invalid position to insert!");
    228     if (Start == End) return;
    229     Chunks.insert(Offset, MakeRopeString(Start, End));
    230   }
    231 
    232   void erase(unsigned Offset, unsigned NumBytes) {
    233     assert(Offset+NumBytes <= size() && "Invalid region to erase!");
    234     if (NumBytes == 0) return;
    235     Chunks.erase(Offset, NumBytes);
    236   }
    237 
    238 private:
    239   RopePiece MakeRopeString(const char *Start, const char *End);
    240 };
    241 
    242 } // end namespace clang
    243 
    244 #endif
    245