1 /* ANTLRTokenBuffer.cpp 2 * 3 * SOFTWARE RIGHTS 4 * 5 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 6 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 7 * company may do whatever they wish with source code distributed with 8 * PCCTS or the code generated by PCCTS, including the incorporation of 9 * PCCTS, or its output, into commerical software. 10 * 11 * We encourage users to develop software with PCCTS. However, we do ask 12 * that credit is given to us for developing PCCTS. By "credit", 13 * we mean that if you incorporate our source code into one of your 14 * programs (commercial product, research project, or otherwise) that you 15 * acknowledge this fact somewhere in the documentation, research report, 16 * etc... If you like PCCTS and have developed a nice tool with the 17 * output, please mention that you developed it using PCCTS. In 18 * addition, we ask that this header remain intact in our source code. 19 * As long as these guidelines are kept, we expect to continue enhancing 20 * this system and expect to make other tools available as they are 21 * completed. 22 * 23 * ANTLR 1.33 24 * Terence Parr 25 * Parr Research Corporation 26 * with Purdue University and AHPCRC, University of Minnesota 27 * 1989-2000 28 */ 29 30 typedef int ANTLRTokenType; // fool AToken.h into compiling 31 32 class ANTLRParser; /* MR1 */ 33 34 #define ANTLR_SUPPORT_CODE 35 36 #include "pcctscfg.h" 37 38 #include ATOKENBUFFER_H 39 #include APARSER_H // MR23 40 41 typedef ANTLRAbstractToken *_ANTLRTokenPtr; 42 43 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 44 static unsigned char test[1000]; 45 #endif 46 47 #ifdef DBG_REFCOUNTTOKEN 48 int ANTLRRefCountToken::ctor = 0; /* MR23 */ 49 int ANTLRRefCountToken::dtor = 0; /* MR23 */ 50 #endif 51 52 ANTLRTokenBuffer:: 53 ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _chunk_size_formal) /* MR14 */ 54 { 55 this->input = _input; 56 this->k = _k; 57 buffer_size = chunk_size = _chunk_size_formal; 58 buffer = (_ANTLRTokenPtr *) 59 calloc(chunk_size+1,sizeof(_ANTLRTokenPtr )); 60 if ( buffer == NULL ) { 61 panic("cannot alloc token buffer"); 62 } 63 buffer++; // leave the first elem empty so tp-1 is valid ptr 64 65 tp = &buffer[0]; 66 last = tp-1; 67 next = &buffer[0]; 68 num_markers = 0; 69 end_of_buffer = &buffer[buffer_size-1]; 70 threshold = &buffer[(int)(buffer_size/2)]; // MR23 - Used to be 1.0/2.0 ! 71 _deleteTokens = 1; // assume we delete tokens 72 parser=NULL; // MR5 - uninitialized reference 73 } 74 75 static void f() {;} 76 ANTLRTokenBuffer:: 77 ~ANTLRTokenBuffer() 78 { 79 f(); 80 // Delete all remaining tokens (from 0..last inclusive) 81 if ( _deleteTokens ) 82 { 83 _ANTLRTokenPtr *z; 84 for (z=buffer; z<=last; z++) 85 { 86 (*z)->deref(); 87 // z->deref(); 88 #ifdef DBG_REFCOUNTTOKEN 89 /* MR23 */ printMessage(stderr, "##########dtor: deleting token '%s' (ref %d)\n", 90 ((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); 91 #endif 92 if ( (*z)->nref()==0 ) 93 { 94 delete (*z); 95 } 96 } 97 } 98 99 if ( buffer!=NULL ) free((char *)(buffer-1)); 100 } 101 102 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 103 #include "pccts_stdio.h" 104 PCCTS_NAMESPACE_STD 105 #endif 106 107 _ANTLRTokenPtr ANTLRTokenBuffer:: 108 getToken() 109 { 110 if ( tp <= last ) // is there any buffered lookahead still to be read? 111 { 112 return *tp++; // read buffered lookahead 113 } 114 // out of buffered lookahead, get some more "real" 115 // input from getANTLRToken() 116 if ( num_markers==0 ) 117 { 118 if( next > threshold ) 119 { 120 #ifdef DBG_TBUF 121 /* MR23 */ printMessage(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer); 122 #endif 123 makeRoom(); 124 } 125 } 126 else { 127 if ( next > end_of_buffer ) 128 { 129 #ifdef DBG_TBUF 130 /* MR23 */ printMessage(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size); 131 #endif 132 extendBuffer(); 133 } 134 } 135 *next = getANTLRToken(); 136 (*next)->ref(); // say we have a copy of this pointer in buffer 137 last = next; 138 next++; 139 tp = last; 140 return *tp++; 141 } 142 143 void ANTLRTokenBuffer:: 144 rewind(int pos) 145 { 146 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 147 /* MR23 */ printMessage(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]); 148 test[pos]--; 149 #endif 150 tp = &buffer[pos]; 151 num_markers--; 152 } 153 154 /* 155 * This function is used to specify that the token pointers read 156 * by the ANTLRTokenBuffer should be buffered up (to be reused later). 157 */ 158 int ANTLRTokenBuffer:: 159 mark() 160 { 161 #if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) 162 test[tp-buffer]++; 163 /* MR23 */ printMessage(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]); 164 #endif 165 num_markers++; 166 return tp - buffer; 167 } 168 169 /* 170 * returns the token pointer n positions ahead. 171 * This implies that bufferedToken(1) gets the NEXT symbol of lookahead. 172 * This is used in conjunction with the ANTLRParser lookahead buffer. 173 * 174 * No markers are set or anything. A bunch of input is buffered--that's all. 175 * The tp pointer is left alone as the lookahead has not been advanced 176 * with getToken(). The next call to getToken() will find a token 177 * in the buffer and won't have to call getANTLRToken(). 178 * 179 * If this is called before a consume() is done, how_many_more_i_need is 180 * set to 'n'. 181 */ 182 _ANTLRTokenPtr ANTLRTokenBuffer:: 183 bufferedToken(int n) 184 { 185 // int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1; 186 int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1; 187 // Make sure that at least n tokens are available in the buffer 188 #ifdef DBG_TBUF 189 /* MR23 */ printMessage(stderr, "bufferedToken(%d)\n", n); 190 #endif 191 for (int i=1; i<=how_many_more_i_need; i++) 192 { 193 if ( next > end_of_buffer ) // buffer overflow? 194 { 195 extendBuffer(); 196 } 197 *next = getANTLRToken(); 198 (*next)->ref(); // say we have a copy of this pointer in buffer 199 last = next; 200 next++; 201 } 202 return tp[n - 1]; 203 } 204 205 /* If no markers are set, the none of the input needs to be saved (except 206 * for the lookahead Token pointers). We save only k-1 token pointers as 207 * we are guaranteed to do a getANTLRToken() right after this because otherwise 208 * we wouldn't have needed to extend the buffer. 209 * 210 * If there are markers in the buffer, we need to save things and so 211 * extendBuffer() is called. 212 */ 213 void ANTLRTokenBuffer:: 214 makeRoom() 215 { 216 #ifdef DBG_TBUF 217 /* MR23 */ printMessage(stderr, "in makeRoom.................\n"); 218 /* MR23 */ printMessage(stderr, "num_markers==%d\n", num_markers); 219 #endif 220 /* 221 if ( num_markers == 0 ) 222 { 223 */ 224 #ifdef DBG_TBUF 225 /* MR23 */ printMessage(stderr, "moving lookahead and resetting next\n"); 226 227 _ANTLRTokenPtr *r; 228 /* MR23 */ printMessage(stderr, "tbuf = ["); 229 for (r=buffer; r<=last; r++) 230 { 231 if ( *r==NULL ) /* MR23 */ printMessage(stderr, " xxx"); 232 else /* MR23 */ printMessage(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText()); 233 } 234 /* MR23 */ printMessage(stderr, " ]\n"); 235 236 /* MR23 */ printMessage(stderr, 237 "before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer); 238 #endif 239 240 // Delete all tokens from 0..last-(k-1) inclusive 241 if ( _deleteTokens ) 242 { 243 _ANTLRTokenPtr *z; 244 for (z=buffer; z<=last-(k-1); z++) 245 { 246 (*z)->deref(); 247 // z->deref(); 248 #ifdef DBG_REFCOUNTTOKEN 249 /* MR23 */ printMessage(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n", 250 ((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); 251 #endif 252 if ( (*z)->nref()==0 ) 253 { 254 delete (*z); 255 } 256 } 257 } 258 259 // reset the buffer to initial conditions, but move k-1 symbols 260 // to the beginning of buffer and put new input symbol at k 261 _ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1; 262 // ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1; 263 #ifdef DBG_TBUF 264 /* MR23 */ printMessage(stderr, "lookahead buffer = ["); 265 #endif 266 for (int i=1; i<=(k-1); i++) 267 { 268 *p++ = *q++; 269 #ifdef DBG_TBUF 270 /* MR23 */ printMessage(stderr, 271 " '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText()); 272 #endif 273 } 274 #ifdef DBG_TBUF 275 /* MR23 */ printMessage(stderr, " ]\n"); 276 #endif 277 next = &buffer[k-1]; 278 tp = &buffer[k-1]; // tp points to what will be filled in next 279 last = tp-1; 280 #ifdef DBG_TBUF 281 /* MR23 */ printMessage(stderr, 282 "after: tp=%d, last=%d, next=%d\n", 283 tp-buffer, last-buffer, next-buffer); 284 #endif 285 /* 286 } 287 else { 288 extendBuffer(); 289 } 290 */ 291 } 292 293 /* This function extends 'buffer' by chunk_size and returns with all 294 * pointers at the same relative positions in the buffer (the buffer base 295 * address could have changed in realloc()) except that 'next' comes 296 * back set to where the next token should be stored. All other pointers 297 * are untouched. 298 */ 299 void 300 ANTLRTokenBuffer:: 301 extendBuffer() 302 { 303 int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer; 304 #ifdef DBG_TBUF 305 /* MR23 */ printMessage(stderr, "extending physical buffer\n"); 306 #endif 307 buffer_size += chunk_size; 308 buffer = (_ANTLRTokenPtr *) 309 realloc((char *)(buffer-1), 310 (buffer_size+1)*sizeof(_ANTLRTokenPtr )); 311 if ( buffer == NULL ) { 312 panic("cannot alloc token buffer"); 313 } 314 buffer++; // leave the first elem empty so tp-1 is valid ptr 315 316 tp = buffer + save_tp; // put the pointers back to same relative position 317 last = buffer + save_last; 318 next = buffer + save_next; 319 end_of_buffer = &buffer[buffer_size-1]; 320 threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; 321 322 /* 323 // zero out new token ptrs so we'll know if something to delete in buffer 324 ANTLRAbstractToken **p = end_of_buffer-chunk_size+1; 325 for (; p<=end_of_buffer; p++) *p = NULL; 326 */ 327 } 328 329 ANTLRParser * ANTLRTokenBuffer:: // MR1 330 setParser(ANTLRParser *p) { // MR1 331 ANTLRParser *old=parser; // MR1 332 parser=p; // MR1 333 input->setParser(p); // MR1 334 return old; // MR1 335 } // MR1 336 // MR1 337 ANTLRParser * ANTLRTokenBuffer:: // MR1 338 getParser() { // MR1 339 return parser; // MR1 340 } // MR1 341 342 void ANTLRTokenBuffer::panic(const char *msg) // MR23 343 { 344 if (parser) //MR23 345 parser->panic(msg); //MR23 346 else //MR23 347 exit(PCCTS_EXIT_FAILURE); 348 } 349 350 //MR23 351 int ANTLRTokenBuffer::printMessage(FILE* pFile, const char* pFormat, ...) 352 { 353 va_list marker; 354 va_start( marker, pFormat ); 355 356 int iRet = 0; 357 if (parser) 358 parser->printMessageV(pFile, pFormat, marker); 359 else 360 iRet = vfprintf(pFile, pFormat, marker); 361 362 va_end( marker ); 363 return iRet; 364 } 365 366 /* to avoid having to link in another file just for the smart token ptr 367 * stuff, we include it here. Ugh. 368 * 369 * MR23 This causes nothing but problems for IDEs. 370 * Change from .cpp to .h 371 * 372 */ 373 374 #include ATOKPTR_IMPL_H 375