Home | History | Annotate | Download | only in h
      1 /* ANTLRTokenBuffer.C
      2  *
      3  * SOFTWARE RIGHTS
      4  *
      5  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
      6  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
      7  * company may do whatever they wish with source code distributed with
      8  * PCCTS or the code generated by PCCTS, including the incorporation of
      9  * PCCTS, or its output, into commerical software.
     10  *
     11  * We encourage users to develop software with PCCTS.  However, we do ask
     12  * that credit is given to us for developing PCCTS.  By "credit",
     13  * we mean that if you incorporate our source code into one of your
     14  * programs (commercial product, research project, or otherwise) that you
     15  * acknowledge this fact somewhere in the documentation, research report,
     16  * etc...  If you like PCCTS and have developed a nice tool with the
     17  * output, please mention that you developed it using PCCTS.  In
     18  * addition, we ask that this header remain intact in our source code.
     19  * As long as these guidelines are kept, we expect to continue enhancing
     20  * this system and expect to make other tools available as they are
     21  * completed.
     22  *
     23  * ANTLR 1.33
     24  * Terence Parr
     25  * Parr Research Corporation
     26  * with Purdue University and AHPCRC, University of Minnesota
     27  * 1989-1998
     28  */
     29 
     30 typedef int ANTLRTokenType;  // fool AToken.h into compiling
     31 
     32 class ANTLRParser;          /* MR1 */
     33 
     34 #define ANTLR_SUPPORT_CODE
     35 
     36 #include "pcctscfg.h"
     37 
     38 #include ATOKENBUFFER_H
     39 typedef ANTLRAbstractToken *_ANTLRTokenPtr;
     40 
     41 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
     42 static unsigned char test[1000];
     43 #endif
     44 
     45 #ifdef DBG_REFCOUNTTOKEN
     46 int ANTLRCommonToken::ctor = 0;
     47 int ANTLRCommonToken::dtor = 0;
     48 #endif
     49 
     50 ANTLRTokenBuffer::
     51 ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _chunk_size_formal) /* MR14 */
     52 {
     53   this->input = _input;
     54   this->k = _k;
     55   buffer_size = chunk_size = _chunk_size_formal;
     56   buffer = (_ANTLRTokenPtr *)
     57        calloc(chunk_size+1,sizeof(_ANTLRTokenPtr ));
     58   if ( buffer == NULL ) {
     59     panic("cannot alloc token buffer");
     60   }
     61   buffer++;        // leave the first elem empty so tp-1 is valid ptr
     62 
     63   tp = &buffer[0];
     64   last = tp-1;
     65   next = &buffer[0];
     66   num_markers = 0;
     67   end_of_buffer = &buffer[buffer_size-1];
     68   // BUGBUG -- threshold = &buffer[(int)(buffer_size*(1.0/2.0))];
     69   threshold = &buffer[(int)(buffer_size / 2)];
     70   _deleteTokens = 1;   // assume we delete tokens
     71   parser=NULL;        // MR5 - uninitialized reference
     72 }
     73 
     74 static void f() {;}
     75 ANTLRTokenBuffer::
     76 ~ANTLRTokenBuffer()
     77 {
     78   f();
     79   // Delete all remaining tokens (from 0..last inclusive)
     80   if ( _deleteTokens )
     81   {
     82     _ANTLRTokenPtr *z;
     83     for (z=buffer; z<=last; z++)
     84     {
     85       (*z)->deref();
     86 //      z->deref();
     87 #ifdef DBG_REFCOUNTTOKEN
     88           fprintf(stderr, "##########dtor: deleting token '%s' (ref %d)\n",
     89               ((ANTLRCommonToken *)*z)->getText(), (*z)->nref());
     90 #endif
     91       if ( (*z)->nref()==0 )
     92       {
     93         delete (*z);
     94       }
     95     }
     96   }
     97 
     98   if ( buffer!=NULL ) free((char *)(buffer-1));
     99 }
    100 
    101 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
    102 #include "pccts_stdio.h"
    103 PCCTS_NAMESPACE_STD
    104 #endif
    105 
    106 _ANTLRTokenPtr ANTLRTokenBuffer::
    107 getToken()
    108 {
    109   if ( tp <= last )  // is there any buffered lookahead still to be read?
    110   {
    111     return *tp++;  // read buffered lookahead
    112   }
    113   // out of buffered lookahead, get some more "real"
    114   // input from getANTLRToken()
    115   if ( num_markers==0 )
    116   {
    117     if( next > threshold )
    118     {
    119 #ifdef DBG_TBUF
    120 fprintf(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer);
    121 #endif
    122       makeRoom();
    123     }
    124   }
    125   else {
    126     if ( next > end_of_buffer )
    127     {
    128 #ifdef DBG_TBUF
    129 fprintf(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size);
    130 #endif
    131       extendBuffer();
    132     }
    133   }
    134   *next = getANTLRToken();
    135   (*next)->ref();        // say we have a copy of this pointer in buffer
    136   last = next;
    137   next++;
    138   tp = last;
    139   return *tp++;
    140 }
    141 
    142 void ANTLRTokenBuffer::
    143 rewind(int pos)
    144 {
    145 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
    146   fprintf(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]);
    147   test[pos]--;
    148 #endif
    149   tp = &buffer[pos];
    150   num_markers--;
    151 }
    152 
    153 /*
    154  * This function is used to specify that the token pointers read
    155  * by the ANTLRTokenBuffer should be buffered up (to be reused later).
    156  */
    157 int ANTLRTokenBuffer::
    158 mark()
    159 {
    160 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW)
    161   test[tp-buffer]++;
    162   fprintf(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]);
    163 #endif
    164   num_markers++;
    165   return tp - buffer;
    166 }
    167 
    168 /*
    169  * returns the token pointer n positions ahead.
    170  * This implies that bufferedToken(1) gets the NEXT symbol of lookahead.
    171  * This is used in conjunction with the ANTLRParser lookahead buffer.
    172  *
    173  * No markers are set or anything.  A bunch of input is buffered--that's all.
    174  * The tp pointer is left alone as the lookahead has not been advanced
    175  * with getToken().  The next call to getToken() will find a token
    176  * in the buffer and won't have to call getANTLRToken().
    177  *
    178  * If this is called before a consume() is done, how_many_more_i_need is
    179  * set to 'n'.
    180  */
    181 _ANTLRTokenPtr ANTLRTokenBuffer::
    182 bufferedToken(int n)
    183 {
    184 //  int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1;
    185   int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1;
    186   // Make sure that at least n tokens are available in the buffer
    187 #ifdef DBG_TBUF
    188   fprintf(stderr, "bufferedToken(%d)\n", n);
    189 #endif
    190   for (int i=1; i<=how_many_more_i_need; i++)
    191   {
    192     if ( next > end_of_buffer )  // buffer overflow?
    193     {
    194       extendBuffer();
    195     }
    196     *next = getANTLRToken();
    197     (*next)->ref();    // say we have a copy of this pointer in buffer
    198     last = next;
    199     next++;
    200   }
    201   return tp[n - 1];
    202 }
    203 
    204 /* If no markers are set, the none of the input needs to be saved (except
    205  * for the lookahead Token pointers).  We save only k-1 token pointers as
    206  * we are guaranteed to do a getANTLRToken() right after this because otherwise
    207  * we wouldn't have needed to extend the buffer.
    208  *
    209  * If there are markers in the buffer, we need to save things and so
    210  * extendBuffer() is called.
    211  */
    212 void ANTLRTokenBuffer::
    213 makeRoom()
    214 {
    215 #ifdef DBG_TBUF
    216   fprintf(stderr, "in makeRoom.................\n");
    217   fprintf(stderr, "num_markers==%d\n", num_markers);
    218 #endif
    219 /*
    220   if ( num_markers == 0 )
    221   {
    222 */
    223 #ifdef DBG_TBUF
    224     fprintf(stderr, "moving lookahead and resetting next\n");
    225 
    226     _ANTLRTokenPtr *r;
    227     fprintf(stderr, "tbuf = [");
    228     for (r=buffer; r<=last; r++)
    229     {
    230       if ( *r==NULL ) fprintf(stderr, " xxx");
    231       else fprintf(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText());
    232     }
    233     fprintf(stderr, " ]\n");
    234 
    235     fprintf(stderr,
    236     "before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer);
    237 #endif
    238 
    239     // Delete all tokens from 0..last-(k-1) inclusive
    240     if ( _deleteTokens )
    241     {
    242       _ANTLRTokenPtr *z;
    243       for (z=buffer; z<=last-(k-1); z++)
    244       {
    245         (*z)->deref();
    246 //        z->deref();
    247 #ifdef DBG_REFCOUNTTOKEN
    248           fprintf(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n",
    249               ((ANTLRCommonToken *)*z)->getText(), (*z)->nref());
    250 #endif
    251         if ( (*z)->nref()==0 )
    252         {
    253           delete (*z);
    254         }
    255       }
    256     }
    257 
    258     // reset the buffer to initial conditions, but move k-1 symbols
    259     // to the beginning of buffer and put new input symbol at k
    260     _ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1;
    261 //    ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1;
    262 #ifdef DBG_TBUF
    263     fprintf(stderr, "lookahead buffer = [");
    264 #endif
    265     for (int i=1; i<=(k-1); i++)
    266     {
    267       *p++ = *q++;
    268 #ifdef DBG_TBUF
    269       fprintf(stderr,
    270       " '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText());
    271 #endif
    272     }
    273 #ifdef DBG_TBUF
    274     fprintf(stderr, " ]\n");
    275 #endif
    276     next = &buffer[k-1];
    277     tp = &buffer[k-1];  // tp points to what will be filled in next
    278     last = tp-1;
    279 #ifdef DBG_TBUF
    280     fprintf(stderr,
    281     "after: tp=%d, last=%d, next=%d\n",
    282     tp-buffer, last-buffer, next-buffer);
    283 #endif
    284 /*
    285   }
    286   else {
    287     extendBuffer();
    288   }
    289 */
    290 }
    291 
    292 /* This function extends 'buffer' by chunk_size and returns with all
    293  * pointers at the same relative positions in the buffer (the buffer base
    294  * address could have changed in realloc()) except that 'next' comes
    295  * back set to where the next token should be stored.  All other pointers
    296  * are untouched.
    297  */
    298 void
    299 ANTLRTokenBuffer::
    300 extendBuffer()
    301 {
    302   int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer;
    303 #ifdef DBG_TBUF
    304   fprintf(stderr, "extending physical buffer\n");
    305 #endif
    306   buffer_size += chunk_size;
    307   buffer = (_ANTLRTokenPtr *)
    308     realloc((char *)(buffer-1),
    309         (buffer_size+1)*sizeof(_ANTLRTokenPtr ));
    310   if ( buffer == NULL ) {
    311     panic("cannot alloc token buffer");
    312   }
    313   buffer++;        // leave the first elem empty so tp-1 is valid ptr
    314 
    315   tp = buffer + save_tp;  // put the pointers back to same relative position
    316   last = buffer + save_last;
    317   next = buffer + save_next;
    318   end_of_buffer = &buffer[buffer_size-1];
    319   // BUGBUG -- threshold = &buffer[(int)(buffer_size*(1.0/2.0))];
    320   threshold = &buffer[(int)(buffer_size / 2)];
    321 
    322 /*
    323   // zero out new token ptrs so we'll know if something to delete in buffer
    324   ANTLRAbstractToken **p = end_of_buffer-chunk_size+1;
    325   for (; p<=end_of_buffer; p++) *p = NULL;
    326 */
    327 }
    328 
    329 ANTLRParser * ANTLRTokenBuffer::        // MR1
    330 setParser(ANTLRParser *p) {          // MR1
    331   ANTLRParser  *old=parser;          // MR1
    332   parser=p;              // MR1
    333   input->setParser(p);            // MR1
    334   return old;              // MR1
    335 }                // MR1
    336                 // MR1
    337 ANTLRParser * ANTLRTokenBuffer::        // MR1
    338 getParser() {              // MR1
    339   return parser;            // MR1
    340 }                // MR1
    341 
    342 /* to avoid having to link in another file just for the smart token ptr
    343  * stuff, we include it here.  Ugh.
    344  */
    345 #include ATOKPTR_C
    346