1 /* 2 ** This file contains all sources (including headers) to the LEMON 3 ** LALR(1) parser generator. The sources have been combined into a 4 ** single file to make it easy to include LEMON in the source tree 5 ** and Makefile of another program. 6 ** 7 ** The author of this program disclaims copyright. 8 */ 9 #include <stdio.h> 10 #include <stdarg.h> 11 #include <string.h> 12 #include <ctype.h> 13 #include <stdlib.h> 14 #include <assert.h> 15 16 #ifndef __WIN32__ 17 # if defined(_WIN32) || defined(WIN32) 18 # define __WIN32__ 19 # endif 20 #endif 21 22 #ifdef __WIN32__ 23 #ifdef __cplusplus 24 extern "C" { 25 #endif 26 extern int access(const char *path, int mode); 27 #ifdef __cplusplus 28 } 29 #endif 30 #else 31 #include <unistd.h> 32 #endif 33 34 /* #define PRIVATE static */ 35 #define PRIVATE 36 37 #ifdef TEST 38 #define MAXRHS 5 /* Set low to exercise exception code */ 39 #else 40 #define MAXRHS 1000 41 #endif 42 43 static int showPrecedenceConflict = 0; 44 static const char **made_files = NULL; 45 static int made_files_count = 0; 46 static int successful_exit = 0; 47 static void LemonAtExit(void) 48 { 49 /* if we failed, delete (most) files we made, to unconfuse build tools. */ 50 int i; 51 for (i = 0; i < made_files_count; i++) { 52 if (!successful_exit) { 53 remove(made_files[i]); 54 } 55 } 56 free(made_files); 57 made_files_count = 0; 58 made_files = NULL; 59 } 60 61 static char *msort(char*,char**,int(*)(const char*,const char*)); 62 63 /* 64 ** Compilers are getting increasingly pedantic about type conversions 65 ** as C evolves ever closer to Ada.... To work around the latest problems 66 ** we have to define the following variant of strlen(). 67 */ 68 #define lemonStrlen(X) ((int)strlen(X)) 69 70 /* a few forward declarations... */ 71 struct rule; 72 struct lemon; 73 struct action; 74 75 static struct action *Action_new(void); 76 static struct action *Action_sort(struct action *); 77 78 /********** From the file "build.h" ************************************/ 79 void FindRulePrecedences(); 80 void FindFirstSets(); 81 void FindStates(); 82 void FindLinks(); 83 void FindFollowSets(); 84 void FindActions(); 85 86 /********* From the file "configlist.h" *********************************/ 87 void Configlist_init(void); 88 struct config *Configlist_add(struct rule *, int); 89 struct config *Configlist_addbasis(struct rule *, int); 90 void Configlist_closure(struct lemon *); 91 void Configlist_sort(void); 92 void Configlist_sortbasis(void); 93 struct config *Configlist_return(void); 94 struct config *Configlist_basis(void); 95 void Configlist_eat(struct config *); 96 void Configlist_reset(void); 97 98 /********* From the file "error.h" ***************************************/ 99 void ErrorMsg(const char *, int,const char *, ...); 100 101 /****** From the file "option.h" ******************************************/ 102 enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, 103 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; 104 struct s_options { 105 enum option_type type; 106 const char *label; 107 char *arg; 108 const char *message; 109 }; 110 int OptInit(char**,struct s_options*,FILE*); 111 int OptNArgs(void); 112 char *OptArg(int); 113 void OptErr(int); 114 void OptPrint(void); 115 116 /******** From the file "parse.h" *****************************************/ 117 void Parse(struct lemon *lemp); 118 119 /********* From the file "plink.h" ***************************************/ 120 struct plink *Plink_new(void); 121 void Plink_add(struct plink **, struct config *); 122 void Plink_copy(struct plink **, struct plink *); 123 void Plink_delete(struct plink *); 124 125 /********** From the file "report.h" *************************************/ 126 void Reprint(struct lemon *); 127 void ReportOutput(struct lemon *); 128 void ReportTable(struct lemon *, int); 129 void ReportHeader(struct lemon *); 130 void CompressTables(struct lemon *); 131 void ResortStates(struct lemon *); 132 133 /********** From the file "set.h" ****************************************/ 134 void SetSize(int); /* All sets will be of size N */ 135 char *SetNew(void); /* A new set for element 0..N */ 136 void SetFree(char*); /* Deallocate a set */ 137 138 char *SetNew(void); /* A new set for element 0..N */ 139 int SetAdd(char*,int); /* Add element to a set */ 140 int SetUnion(char *,char *); /* A <- A U B, thru element N */ 141 #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ 142 143 /********** From the file "struct.h" *************************************/ 144 /* 145 ** Principal data structures for the LEMON parser generator. 146 */ 147 148 typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; 149 150 /* Symbols (terminals and nonterminals) of the grammar are stored 151 ** in the following: */ 152 enum symbol_type { 153 TERMINAL, 154 NONTERMINAL, 155 MULTITERMINAL 156 }; 157 enum e_assoc { 158 LEFT, 159 RIGHT, 160 NONE, 161 UNK 162 }; 163 struct symbol { 164 const char *name; /* Name of the symbol */ 165 int index; /* Index number for this symbol */ 166 enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ 167 struct rule *rule; /* Linked list of rules of this (if an NT) */ 168 struct symbol *fallback; /* fallback token in case this token doesn't parse */ 169 int prec; /* Precedence if defined (-1 otherwise) */ 170 enum e_assoc assoc; /* Associativity if precedence is defined */ 171 char *firstset; /* First-set for all rules of this symbol */ 172 Boolean lambda; /* True if NT and can generate an empty string */ 173 int useCnt; /* Number of times used */ 174 char *destructor; /* Code which executes whenever this symbol is 175 ** popped from the stack during error processing */ 176 int destLineno; /* Line number for start of destructor */ 177 char *datatype; /* The data type of information held by this 178 ** object. Only used if type==NONTERMINAL */ 179 int dtnum; /* The data type number. In the parser, the value 180 ** stack is a union. The .yy%d element of this 181 ** union is the correct data type for this object */ 182 /* The following fields are used by MULTITERMINALs only */ 183 int nsubsym; /* Number of constituent symbols in the MULTI */ 184 struct symbol **subsym; /* Array of constituent symbols */ 185 }; 186 187 /* Each production rule in the grammar is stored in the following 188 ** structure. */ 189 struct rule { 190 struct symbol *lhs; /* Left-hand side of the rule */ 191 const char *lhsalias; /* Alias for the LHS (NULL if none) */ 192 int lhsStart; /* True if left-hand side is the start symbol */ 193 int ruleline; /* Line number for the rule */ 194 int nrhs; /* Number of RHS symbols */ 195 struct symbol **rhs; /* The RHS symbols */ 196 const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ 197 int line; /* Line number at which code begins */ 198 const char *code; /* The code executed when this rule is reduced */ 199 struct symbol *precsym; /* Precedence symbol for this rule */ 200 int index; /* An index number for this rule */ 201 Boolean canReduce; /* True if this rule is ever reduced */ 202 struct rule *nextlhs; /* Next rule with the same LHS */ 203 struct rule *next; /* Next rule in the global list */ 204 }; 205 206 /* A configuration is a production rule of the grammar together with 207 ** a mark (dot) showing how much of that rule has been processed so far. 208 ** Configurations also contain a follow-set which is a list of terminal 209 ** symbols which are allowed to immediately follow the end of the rule. 210 ** Every configuration is recorded as an instance of the following: */ 211 enum cfgstatus { 212 COMPLETE, 213 INCOMPLETE 214 }; 215 struct config { 216 struct rule *rp; /* The rule upon which the configuration is based */ 217 int dot; /* The parse point */ 218 char *fws; /* Follow-set for this configuration only */ 219 struct plink *fplp; /* Follow-set forward propagation links */ 220 struct plink *bplp; /* Follow-set backwards propagation links */ 221 struct state *stp; /* Pointer to state which contains this */ 222 enum cfgstatus status; /* used during followset and shift computations */ 223 struct config *next; /* Next configuration in the state */ 224 struct config *bp; /* The next basis configuration */ 225 }; 226 227 enum e_action { 228 SHIFT, 229 ACCEPT, 230 REDUCE, 231 ERROR, 232 SSCONFLICT, /* A shift/shift conflict */ 233 SRCONFLICT, /* Was a reduce, but part of a conflict */ 234 RRCONFLICT, /* Was a reduce, but part of a conflict */ 235 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ 236 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ 237 NOT_USED /* Deleted by compression */ 238 }; 239 240 /* Every shift or reduce operation is stored as one of the following */ 241 struct action { 242 struct symbol *sp; /* The look-ahead symbol */ 243 enum e_action type; 244 union { 245 struct state *stp; /* The new state, if a shift */ 246 struct rule *rp; /* The rule, if a reduce */ 247 } x; 248 struct action *next; /* Next action for this state */ 249 struct action *collide; /* Next action with the same hash */ 250 }; 251 252 /* Each state of the generated parser's finite state machine 253 ** is encoded as an instance of the following structure. */ 254 struct state { 255 struct config *bp; /* The basis configurations for this state */ 256 struct config *cfp; /* All configurations in this set */ 257 int statenum; /* Sequential number for this state */ 258 struct action *ap; /* Array of actions for this state */ 259 int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ 260 int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ 261 int iDflt; /* Default action */ 262 }; 263 #define NO_OFFSET (-2147483647) 264 265 /* A followset propagation link indicates that the contents of one 266 ** configuration followset should be propagated to another whenever 267 ** the first changes. */ 268 struct plink { 269 struct config *cfp; /* The configuration to which linked */ 270 struct plink *next; /* The next propagate link */ 271 }; 272 273 /* The state vector for the entire parser generator is recorded as 274 ** follows. (LEMON uses no global variables and makes little use of 275 ** static variables. Fields in the following structure can be thought 276 ** of as begin global variables in the program.) */ 277 struct lemon { 278 struct state **sorted; /* Table of states sorted by state number */ 279 struct rule *rule; /* List of all rules */ 280 int nstate; /* Number of states */ 281 int nrule; /* Number of rules */ 282 int nsymbol; /* Number of terminal and nonterminal symbols */ 283 int nterminal; /* Number of terminal symbols */ 284 struct symbol **symbols; /* Sorted array of pointers to symbols */ 285 int errorcnt; /* Number of errors */ 286 struct symbol *errsym; /* The error symbol */ 287 struct symbol *wildcard; /* Token that matches anything */ 288 char *name; /* Name of the generated parser */ 289 char *arg; /* Declaration of the 3th argument to parser */ 290 char *tokentype; /* Type of terminal symbols in the parser stack */ 291 char *vartype; /* The default type of non-terminal symbols */ 292 char *start; /* Name of the start symbol for the grammar */ 293 char *stacksize; /* Size of the parser stack */ 294 char *include; /* Code to put at the start of the C file */ 295 char *error; /* Code to execute when an error is seen */ 296 char *overflow; /* Code to execute on a stack overflow */ 297 char *failure; /* Code to execute on parser failure */ 298 char *accept; /* Code to execute when the parser excepts */ 299 char *extracode; /* Code appended to the generated file */ 300 char *tokendest; /* Code to execute to destroy token data */ 301 char *vardest; /* Code for the default non-terminal destructor */ 302 char *filename; /* Name of the input file */ 303 char *outname; /* Name of the current output file */ 304 char *tokenprefix; /* A prefix added to token names in the .h file */ 305 int nconflict; /* Number of parsing conflicts */ 306 int tablesize; /* Size of the parse tables */ 307 int basisflag; /* Print only basis configurations */ 308 int has_fallback; /* True if any %fallback is seen in the grammar */ 309 int nolinenosflag; /* True if #line statements should not be printed */ 310 char *argv0; /* Name of the program */ 311 }; 312 313 #define MemoryCheck(X) if((X)==0){ \ 314 extern void memory_error(); \ 315 memory_error(); \ 316 } 317 318 /**************** From the file "table.h" *********************************/ 319 /* 320 ** All code in this file has been automatically generated 321 ** from a specification in the file 322 ** "table.q" 323 ** by the associative array code building program "aagen". 324 ** Do not edit this file! Instead, edit the specification 325 ** file, then rerun aagen. 326 */ 327 /* 328 ** Code for processing tables in the LEMON parser generator. 329 */ 330 /* Routines for handling a strings */ 331 332 const char *Strsafe(const char *); 333 334 void Strsafe_init(void); 335 int Strsafe_insert(const char *); 336 const char *Strsafe_find(const char *); 337 338 /* Routines for handling symbols of the grammar */ 339 340 struct symbol *Symbol_new(const char *); 341 int Symbolcmpp(const void *, const void *); 342 void Symbol_init(void); 343 int Symbol_insert(struct symbol *, const char *); 344 struct symbol *Symbol_find(const char *); 345 struct symbol *Symbol_Nth(int); 346 int Symbol_count(void); 347 struct symbol **Symbol_arrayof(void); 348 349 /* Routines to manage the state table */ 350 351 int Configcmp(const char *, const char *); 352 struct state *State_new(void); 353 void State_init(void); 354 int State_insert(struct state *, struct config *); 355 struct state *State_find(struct config *); 356 struct state **State_arrayof(/* */); 357 358 /* Routines used for efficiency in Configlist_add */ 359 360 void Configtable_init(void); 361 int Configtable_insert(struct config *); 362 struct config *Configtable_find(struct config *); 363 void Configtable_clear(int(*)(struct config *)); 364 365 /****************** From the file "action.c" *******************************/ 366 /* 367 ** Routines processing parser actions in the LEMON parser generator. 368 */ 369 370 /* Allocate a new parser action */ 371 static struct action *Action_new(void){ 372 static struct action *freelist = 0; 373 struct action *newaction; 374 375 if( freelist==0 ){ 376 int i; 377 int amt = 100; 378 freelist = (struct action *)calloc(amt, sizeof(struct action)); 379 if( freelist==0 ){ 380 fprintf(stderr,"Unable to allocate memory for a new parser action."); 381 exit(1); 382 } 383 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; 384 freelist[amt-1].next = 0; 385 } 386 newaction = freelist; 387 freelist = freelist->next; 388 return newaction; 389 } 390 391 /* Compare two actions for sorting purposes. Return negative, zero, or 392 ** positive if the first action is less than, equal to, or greater than 393 ** the first 394 */ 395 static int actioncmp( 396 struct action *ap1, 397 struct action *ap2 398 ){ 399 int rc; 400 rc = ap1->sp->index - ap2->sp->index; 401 if( rc==0 ){ 402 rc = (int)ap1->type - (int)ap2->type; 403 } 404 if( rc==0 && ap1->type==REDUCE ){ 405 rc = ap1->x.rp->index - ap2->x.rp->index; 406 } 407 if( rc==0 ){ 408 rc = (int) (ap2 - ap1); 409 } 410 return rc; 411 } 412 413 /* Sort parser actions */ 414 static struct action *Action_sort( 415 struct action *ap 416 ){ 417 ap = (struct action *)msort((char *)ap,(char **)&ap->next, 418 (int(*)(const char*,const char*))actioncmp); 419 return ap; 420 } 421 422 void Action_add( 423 struct action **app, 424 enum e_action type, 425 struct symbol *sp, 426 char *arg 427 ){ 428 struct action *newaction; 429 newaction = Action_new(); 430 newaction->next = *app; 431 *app = newaction; 432 newaction->type = type; 433 newaction->sp = sp; 434 if( type==SHIFT ){ 435 newaction->x.stp = (struct state *)arg; 436 }else{ 437 newaction->x.rp = (struct rule *)arg; 438 } 439 } 440 /********************** New code to implement the "acttab" module ***********/ 441 /* 442 ** This module implements routines use to construct the yy_action[] table. 443 */ 444 445 /* 446 ** The state of the yy_action table under construction is an instance of 447 ** the following structure. 448 ** 449 ** The yy_action table maps the pair (state_number, lookahead) into an 450 ** action_number. The table is an array of integers pairs. The state_number 451 ** determines an initial offset into the yy_action array. The lookahead 452 ** value is then added to this initial offset to get an index X into the 453 ** yy_action array. If the aAction[X].lookahead equals the value of the 454 ** of the lookahead input, then the value of the action_number output is 455 ** aAction[X].action. If the lookaheads do not match then the 456 ** default action for the state_number is returned. 457 ** 458 ** All actions associated with a single state_number are first entered 459 ** into aLookahead[] using multiple calls to acttab_action(). Then the 460 ** actions for that single state_number are placed into the aAction[] 461 ** array with a single call to acttab_insert(). The acttab_insert() call 462 ** also resets the aLookahead[] array in preparation for the next 463 ** state number. 464 */ 465 struct lookahead_action { 466 int lookahead; /* Value of the lookahead token */ 467 int action; /* Action to take on the given lookahead */ 468 }; 469 typedef struct acttab acttab; 470 struct acttab { 471 int nAction; /* Number of used slots in aAction[] */ 472 int nActionAlloc; /* Slots allocated for aAction[] */ 473 struct lookahead_action 474 *aAction, /* The yy_action[] table under construction */ 475 *aLookahead; /* A single new transaction set */ 476 int mnLookahead; /* Minimum aLookahead[].lookahead */ 477 int mnAction; /* Action associated with mnLookahead */ 478 int mxLookahead; /* Maximum aLookahead[].lookahead */ 479 int nLookahead; /* Used slots in aLookahead[] */ 480 int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ 481 }; 482 483 /* Return the number of entries in the yy_action table */ 484 #define acttab_size(X) ((X)->nAction) 485 486 /* The value for the N-th entry in yy_action */ 487 #define acttab_yyaction(X,N) ((X)->aAction[N].action) 488 489 /* The value for the N-th entry in yy_lookahead */ 490 #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) 491 492 /* Free all memory associated with the given acttab */ 493 void acttab_free(acttab *p){ 494 free( p->aAction ); 495 free( p->aLookahead ); 496 free( p ); 497 } 498 499 /* Allocate a new acttab structure */ 500 acttab *acttab_alloc(void){ 501 acttab *p = (acttab *) calloc( 1, sizeof(*p) ); 502 if( p==0 ){ 503 fprintf(stderr,"Unable to allocate memory for a new acttab."); 504 exit(1); 505 } 506 memset(p, 0, sizeof(*p)); 507 return p; 508 } 509 510 /* Add a new action to the current transaction set. 511 ** 512 ** This routine is called once for each lookahead for a particular 513 ** state. 514 */ 515 void acttab_action(acttab *p, int lookahead, int action){ 516 if( p->nLookahead>=p->nLookaheadAlloc ){ 517 p->nLookaheadAlloc += 25; 518 p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, 519 sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); 520 if( p->aLookahead==0 ){ 521 fprintf(stderr,"malloc failed\n"); 522 exit(1); 523 } 524 } 525 if( p->nLookahead==0 ){ 526 p->mxLookahead = lookahead; 527 p->mnLookahead = lookahead; 528 p->mnAction = action; 529 }else{ 530 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead; 531 if( p->mnLookahead>lookahead ){ 532 p->mnLookahead = lookahead; 533 p->mnAction = action; 534 } 535 } 536 p->aLookahead[p->nLookahead].lookahead = lookahead; 537 p->aLookahead[p->nLookahead].action = action; 538 p->nLookahead++; 539 } 540 541 /* 542 ** Add the transaction set built up with prior calls to acttab_action() 543 ** into the current action table. Then reset the transaction set back 544 ** to an empty set in preparation for a new round of acttab_action() calls. 545 ** 546 ** Return the offset into the action table of the new transaction. 547 */ 548 int acttab_insert(acttab *p){ 549 int i, j, k, n; 550 assert( p->nLookahead>0 ); 551 552 /* Make sure we have enough space to hold the expanded action table 553 ** in the worst case. The worst case occurs if the transaction set 554 ** must be appended to the current action table 555 */ 556 n = p->mxLookahead + 1; 557 if( p->nAction + n >= p->nActionAlloc ){ 558 int oldAlloc = p->nActionAlloc; 559 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; 560 p->aAction = (struct lookahead_action *) realloc( p->aAction, 561 sizeof(p->aAction[0])*p->nActionAlloc); 562 if( p->aAction==0 ){ 563 fprintf(stderr,"malloc failed\n"); 564 exit(1); 565 } 566 for(i=oldAlloc; i<p->nActionAlloc; i++){ 567 p->aAction[i].lookahead = -1; 568 p->aAction[i].action = -1; 569 } 570 } 571 572 /* Scan the existing action table looking for an offset that is a 573 ** duplicate of the current transaction set. Fall out of the loop 574 ** if and when the duplicate is found. 575 ** 576 ** i is the index in p->aAction[] where p->mnLookahead is inserted. 577 */ 578 for(i=p->nAction-1; i>=0; i--){ 579 if( p->aAction[i].lookahead==p->mnLookahead ){ 580 /* All lookaheads and actions in the aLookahead[] transaction 581 ** must match against the candidate aAction[i] entry. */ 582 if( p->aAction[i].action!=p->mnAction ) continue; 583 for(j=0; j<p->nLookahead; j++){ 584 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 585 if( k<0 || k>=p->nAction ) break; 586 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; 587 if( p->aLookahead[j].action!=p->aAction[k].action ) break; 588 } 589 if( j<p->nLookahead ) continue; 590 591 /* No possible lookahead value that is not in the aLookahead[] 592 ** transaction is allowed to match aAction[i] */ 593 n = 0; 594 for(j=0; j<p->nAction; j++){ 595 if( p->aAction[j].lookahead<0 ) continue; 596 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; 597 } 598 if( n==p->nLookahead ){ 599 break; /* An exact match is found at offset i */ 600 } 601 } 602 } 603 604 /* If no existing offsets exactly match the current transaction, find an 605 ** an empty offset in the aAction[] table in which we can add the 606 ** aLookahead[] transaction. 607 */ 608 if( i<0 ){ 609 /* Look for holes in the aAction[] table that fit the current 610 ** aLookahead[] transaction. Leave i set to the offset of the hole. 611 ** If no holes are found, i is left at p->nAction, which means the 612 ** transaction will be appended. */ 613 for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){ 614 if( p->aAction[i].lookahead<0 ){ 615 for(j=0; j<p->nLookahead; j++){ 616 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 617 if( k<0 ) break; 618 if( p->aAction[k].lookahead>=0 ) break; 619 } 620 if( j<p->nLookahead ) continue; 621 for(j=0; j<p->nAction; j++){ 622 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; 623 } 624 if( j==p->nAction ){ 625 break; /* Fits in empty slots */ 626 } 627 } 628 } 629 } 630 /* Insert transaction set at index i. */ 631 for(j=0; j<p->nLookahead; j++){ 632 k = p->aLookahead[j].lookahead - p->mnLookahead + i; 633 p->aAction[k] = p->aLookahead[j]; 634 if( k>=p->nAction ) p->nAction = k+1; 635 } 636 p->nLookahead = 0; 637 638 /* Return the offset that is added to the lookahead in order to get the 639 ** index into yy_action of the action */ 640 return i - p->mnLookahead; 641 } 642 643 /********************** From the file "build.c" *****************************/ 644 /* 645 ** Routines to construction the finite state machine for the LEMON 646 ** parser generator. 647 */ 648 649 /* Find a precedence symbol of every rule in the grammar. 650 ** 651 ** Those rules which have a precedence symbol coded in the input 652 ** grammar using the "[symbol]" construct will already have the 653 ** rp->precsym field filled. Other rules take as their precedence 654 ** symbol the first RHS symbol with a defined precedence. If there 655 ** are not RHS symbols with a defined precedence, the precedence 656 ** symbol field is left blank. 657 */ 658 void FindRulePrecedences(struct lemon *xp) 659 { 660 struct rule *rp; 661 for(rp=xp->rule; rp; rp=rp->next){ 662 if( rp->precsym==0 ){ 663 int i, j; 664 for(i=0; i<rp->nrhs && rp->precsym==0; i++){ 665 struct symbol *sp = rp->rhs[i]; 666 if( sp->type==MULTITERMINAL ){ 667 for(j=0; j<sp->nsubsym; j++){ 668 if( sp->subsym[j]->prec>=0 ){ 669 rp->precsym = sp->subsym[j]; 670 break; 671 } 672 } 673 }else if( sp->prec>=0 ){ 674 rp->precsym = rp->rhs[i]; 675 } 676 } 677 } 678 } 679 return; 680 } 681 682 /* Find all nonterminals which will generate the empty string. 683 ** Then go back and compute the first sets of every nonterminal. 684 ** The first set is the set of all terminal symbols which can begin 685 ** a string generated by that nonterminal. 686 */ 687 void FindFirstSets(struct lemon *lemp) 688 { 689 int i, j; 690 struct rule *rp; 691 int progress; 692 693 for(i=0; i<lemp->nsymbol; i++){ 694 lemp->symbols[i]->lambda = LEMON_FALSE; 695 } 696 for(i=lemp->nterminal; i<lemp->nsymbol; i++){ 697 lemp->symbols[i]->firstset = SetNew(); 698 } 699 700 /* First compute all lambdas */ 701 do{ 702 progress = 0; 703 for(rp=lemp->rule; rp; rp=rp->next){ 704 if( rp->lhs->lambda ) continue; 705 for(i=0; i<rp->nrhs; i++){ 706 struct symbol *sp = rp->rhs[i]; 707 if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break; 708 } 709 if( i==rp->nrhs ){ 710 rp->lhs->lambda = LEMON_TRUE; 711 progress = 1; 712 } 713 } 714 }while( progress ); 715 716 /* Now compute all first sets */ 717 do{ 718 struct symbol *s1, *s2; 719 progress = 0; 720 for(rp=lemp->rule; rp; rp=rp->next){ 721 s1 = rp->lhs; 722 for(i=0; i<rp->nrhs; i++){ 723 s2 = rp->rhs[i]; 724 if( s2->type==TERMINAL ){ 725 progress += SetAdd(s1->firstset,s2->index); 726 break; 727 }else if( s2->type==MULTITERMINAL ){ 728 for(j=0; j<s2->nsubsym; j++){ 729 progress += SetAdd(s1->firstset,s2->subsym[j]->index); 730 } 731 break; 732 }else if( s1==s2 ){ 733 if( s1->lambda==LEMON_FALSE ) break; 734 }else{ 735 progress += SetUnion(s1->firstset,s2->firstset); 736 if( s2->lambda==LEMON_FALSE ) break; 737 } 738 } 739 } 740 }while( progress ); 741 return; 742 } 743 744 /* Compute all LR(0) states for the grammar. Links 745 ** are added to between some states so that the LR(1) follow sets 746 ** can be computed later. 747 */ 748 PRIVATE struct state *getstate(struct lemon *); /* forward reference */ 749 void FindStates(struct lemon *lemp) 750 { 751 struct symbol *sp; 752 struct rule *rp; 753 754 Configlist_init(); 755 756 /* Find the start symbol */ 757 if( lemp->start ){ 758 sp = Symbol_find(lemp->start); 759 if( sp==0 ){ 760 ErrorMsg(lemp->filename,0, 761 "The specified start symbol \"%s\" is not \ 762 in a nonterminal of the grammar. \"%s\" will be used as the start \ 763 symbol instead.",lemp->start,lemp->rule->lhs->name); 764 lemp->errorcnt++; 765 sp = lemp->rule->lhs; 766 } 767 }else{ 768 sp = lemp->rule->lhs; 769 } 770 771 /* Make sure the start symbol doesn't occur on the right-hand side of 772 ** any rule. Report an error if it does. (YACC would generate a new 773 ** start symbol in this case.) */ 774 for(rp=lemp->rule; rp; rp=rp->next){ 775 int i; 776 for(i=0; i<rp->nrhs; i++){ 777 if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ 778 ErrorMsg(lemp->filename,0, 779 "The start symbol \"%s\" occurs on the \ 780 right-hand side of a rule. This will result in a parser which \ 781 does not work properly.",sp->name); 782 lemp->errorcnt++; 783 } 784 } 785 } 786 787 /* The basis configuration set for the first state 788 ** is all rules which have the start symbol as their 789 ** left-hand side */ 790 for(rp=sp->rule; rp; rp=rp->nextlhs){ 791 struct config *newcfp; 792 rp->lhsStart = 1; 793 newcfp = Configlist_addbasis(rp,0); 794 SetAdd(newcfp->fws,0); 795 } 796 797 /* Compute the first state. All other states will be 798 ** computed automatically during the computation of the first one. 799 ** The returned pointer to the first state is not used. */ 800 (void)getstate(lemp); 801 return; 802 } 803 804 /* Return a pointer to a state which is described by the configuration 805 ** list which has been built from calls to Configlist_add. 806 */ 807 PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ 808 PRIVATE struct state *getstate(struct lemon *lemp) 809 { 810 struct config *cfp, *bp; 811 struct state *stp; 812 813 /* Extract the sorted basis of the new state. The basis was constructed 814 ** by prior calls to "Configlist_addbasis()". */ 815 Configlist_sortbasis(); 816 bp = Configlist_basis(); 817 818 /* Get a state with the same basis */ 819 stp = State_find(bp); 820 if( stp ){ 821 /* A state with the same basis already exists! Copy all the follow-set 822 ** propagation links from the state under construction into the 823 ** preexisting state, then return a pointer to the preexisting state */ 824 struct config *x, *y; 825 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ 826 Plink_copy(&y->bplp,x->bplp); 827 Plink_delete(x->fplp); 828 x->fplp = x->bplp = 0; 829 } 830 cfp = Configlist_return(); 831 Configlist_eat(cfp); 832 }else{ 833 /* This really is a new state. Construct all the details */ 834 Configlist_closure(lemp); /* Compute the configuration closure */ 835 Configlist_sort(); /* Sort the configuration closure */ 836 cfp = Configlist_return(); /* Get a pointer to the config list */ 837 stp = State_new(); /* A new state structure */ 838 MemoryCheck(stp); 839 stp->bp = bp; /* Remember the configuration basis */ 840 stp->cfp = cfp; /* Remember the configuration closure */ 841 stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ 842 stp->ap = 0; /* No actions, yet. */ 843 State_insert(stp,stp->bp); /* Add to the state table */ 844 buildshifts(lemp,stp); /* Recursively compute successor states */ 845 } 846 return stp; 847 } 848 849 /* 850 ** Return true if two symbols are the same. 851 */ 852 int same_symbol(struct symbol *a, struct symbol *b) 853 { 854 int i; 855 if( a==b ) return 1; 856 if( a->type!=MULTITERMINAL ) return 0; 857 if( b->type!=MULTITERMINAL ) return 0; 858 if( a->nsubsym!=b->nsubsym ) return 0; 859 for(i=0; i<a->nsubsym; i++){ 860 if( a->subsym[i]!=b->subsym[i] ) return 0; 861 } 862 return 1; 863 } 864 865 /* Construct all successor states to the given state. A "successor" 866 ** state is any state which can be reached by a shift action. 867 */ 868 PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) 869 { 870 struct config *cfp; /* For looping thru the config closure of "stp" */ 871 struct config *bcfp; /* For the inner loop on config closure of "stp" */ 872 struct config *newcfg; /* */ 873 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ 874 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ 875 struct state *newstp; /* A pointer to a successor state */ 876 877 /* Each configuration becomes complete after it contibutes to a successor 878 ** state. Initially, all configurations are incomplete */ 879 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; 880 881 /* Loop through all configurations of the state "stp" */ 882 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 883 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ 884 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ 885 Configlist_reset(); /* Reset the new config set */ 886 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ 887 888 /* For every configuration in the state "stp" which has the symbol "sp" 889 ** following its dot, add the same configuration to the basis set under 890 ** construction but with the dot shifted one symbol to the right. */ 891 for(bcfp=cfp; bcfp; bcfp=bcfp->next){ 892 if( bcfp->status==COMPLETE ) continue; /* Already used */ 893 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ 894 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ 895 if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ 896 bcfp->status = COMPLETE; /* Mark this config as used */ 897 newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); 898 Plink_add(&newcfg->bplp,bcfp); 899 } 900 901 /* Get a pointer to the state described by the basis configuration set 902 ** constructed in the preceding loop */ 903 newstp = getstate(lemp); 904 905 /* The state "newstp" is reached from the state "stp" by a shift action 906 ** on the symbol "sp" */ 907 if( sp->type==MULTITERMINAL ){ 908 int i; 909 for(i=0; i<sp->nsubsym; i++){ 910 Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); 911 } 912 }else{ 913 Action_add(&stp->ap,SHIFT,sp,(char *)newstp); 914 } 915 } 916 } 917 918 /* 919 ** Construct the propagation links 920 */ 921 void FindLinks(struct lemon *lemp) 922 { 923 int i; 924 struct config *cfp, *other; 925 struct state *stp; 926 struct plink *plp; 927 928 /* Housekeeping detail: 929 ** Add to every propagate link a pointer back to the state to 930 ** which the link is attached. */ 931 for(i=0; i<lemp->nstate; i++){ 932 stp = lemp->sorted[i]; 933 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 934 cfp->stp = stp; 935 } 936 } 937 938 /* Convert all backlinks into forward links. Only the forward 939 ** links are used in the follow-set computation. */ 940 for(i=0; i<lemp->nstate; i++){ 941 stp = lemp->sorted[i]; 942 for(cfp=stp->cfp; cfp; cfp=cfp->next){ 943 for(plp=cfp->bplp; plp; plp=plp->next){ 944 other = plp->cfp; 945 Plink_add(&other->fplp,cfp); 946 } 947 } 948 } 949 } 950 951 /* Compute all followsets. 952 ** 953 ** A followset is the set of all symbols which can come immediately 954 ** after a configuration. 955 */ 956 void FindFollowSets(struct lemon *lemp) 957 { 958 int i; 959 struct config *cfp; 960 struct plink *plp; 961 int progress; 962 int change; 963 964 for(i=0; i<lemp->nstate; i++){ 965 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 966 cfp->status = INCOMPLETE; 967 } 968 } 969 970 do{ 971 progress = 0; 972 for(i=0; i<lemp->nstate; i++){ 973 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ 974 if( cfp->status==COMPLETE ) continue; 975 for(plp=cfp->fplp; plp; plp=plp->next){ 976 change = SetUnion(plp->cfp->fws,cfp->fws); 977 if( change ){ 978 plp->cfp->status = INCOMPLETE; 979 progress = 1; 980 } 981 } 982 cfp->status = COMPLETE; 983 } 984 } 985 }while( progress ); 986 } 987 988 static int resolve_conflict(struct action *,struct action *, struct symbol *); 989 990 /* Compute the reduce actions, and resolve conflicts. 991 */ 992 void FindActions(struct lemon *lemp) 993 { 994 int i,j; 995 struct config *cfp; 996 struct state *stp; 997 struct symbol *sp; 998 struct rule *rp; 999 1000 /* Add all of the reduce actions 1001 ** A reduce action is added for each element of the followset of 1002 ** a configuration which has its dot at the extreme right. 1003 */ 1004 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */ 1005 stp = lemp->sorted[i]; 1006 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ 1007 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ 1008 for(j=0; j<lemp->nterminal; j++){ 1009 if( SetFind(cfp->fws,j) ){ 1010 /* Add a reduce action to the state "stp" which will reduce by the 1011 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ 1012 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); 1013 } 1014 } 1015 } 1016 } 1017 } 1018 1019 /* Add the accepting token */ 1020 if( lemp->start ){ 1021 sp = Symbol_find(lemp->start); 1022 if( sp==0 ) sp = lemp->rule->lhs; 1023 }else{ 1024 sp = lemp->rule->lhs; 1025 } 1026 /* Add to the first state (which is always the starting state of the 1027 ** finite state machine) an action to ACCEPT if the lookahead is the 1028 ** start nonterminal. */ 1029 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); 1030 1031 /* Resolve conflicts */ 1032 for(i=0; i<lemp->nstate; i++){ 1033 struct action *ap, *nap; 1034 struct state *stp; 1035 stp = lemp->sorted[i]; 1036 /* assert( stp->ap ); */ 1037 stp->ap = Action_sort(stp->ap); 1038 for(ap=stp->ap; ap && ap->next; ap=ap->next){ 1039 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ 1040 /* The two actions "ap" and "nap" have the same lookahead. 1041 ** Figure out which one should be used */ 1042 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym); 1043 } 1044 } 1045 } 1046 1047 /* Report an error for each rule that can never be reduced. */ 1048 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; 1049 for(i=0; i<lemp->nstate; i++){ 1050 struct action *ap; 1051 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ 1052 if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; 1053 } 1054 } 1055 for(rp=lemp->rule; rp; rp=rp->next){ 1056 if( rp->canReduce ) continue; 1057 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); 1058 lemp->errorcnt++; 1059 } 1060 } 1061 1062 /* Resolve a conflict between the two given actions. If the 1063 ** conflict can't be resolved, return non-zero. 1064 ** 1065 ** NO LONGER TRUE: 1066 ** To resolve a conflict, first look to see if either action 1067 ** is on an error rule. In that case, take the action which 1068 ** is not associated with the error rule. If neither or both 1069 ** actions are associated with an error rule, then try to 1070 ** use precedence to resolve the conflict. 1071 ** 1072 ** If either action is a SHIFT, then it must be apx. This 1073 ** function won't work if apx->type==REDUCE and apy->type==SHIFT. 1074 */ 1075 static int resolve_conflict( 1076 struct action *apx, 1077 struct action *apy, 1078 struct symbol *errsym /* The error symbol (if defined. NULL otherwise) */ 1079 ){ 1080 struct symbol *spx, *spy; 1081 int errcnt = 0; 1082 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ 1083 if( apx->type==SHIFT && apy->type==SHIFT ){ 1084 apy->type = SSCONFLICT; 1085 errcnt++; 1086 } 1087 if( apx->type==SHIFT && apy->type==REDUCE ){ 1088 spx = apx->sp; 1089 spy = apy->x.rp->precsym; 1090 if( spy==0 || spx->prec<0 || spy->prec<0 ){ 1091 /* Not enough precedence information. */ 1092 apy->type = SRCONFLICT; 1093 errcnt++; 1094 }else if( spx->prec>spy->prec ){ /* higher precedence wins */ 1095 apy->type = RD_RESOLVED; 1096 }else if( spx->prec<spy->prec ){ 1097 apx->type = SH_RESOLVED; 1098 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ 1099 apy->type = RD_RESOLVED; /* associativity */ 1100 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ 1101 apx->type = SH_RESOLVED; 1102 }else{ 1103 assert( spx->prec==spy->prec && spx->assoc==NONE ); 1104 apy->type = SRCONFLICT; 1105 errcnt++; 1106 } 1107 }else if( apx->type==REDUCE && apy->type==REDUCE ){ 1108 spx = apx->x.rp->precsym; 1109 spy = apy->x.rp->precsym; 1110 if( spx==0 || spy==0 || spx->prec<0 || 1111 spy->prec<0 || spx->prec==spy->prec ){ 1112 apy->type = RRCONFLICT; 1113 errcnt++; 1114 }else if( spx->prec>spy->prec ){ 1115 apy->type = RD_RESOLVED; 1116 }else if( spx->prec<spy->prec ){ 1117 apx->type = RD_RESOLVED; 1118 } 1119 }else{ 1120 assert( 1121 apx->type==SH_RESOLVED || 1122 apx->type==RD_RESOLVED || 1123 apx->type==SSCONFLICT || 1124 apx->type==SRCONFLICT || 1125 apx->type==RRCONFLICT || 1126 apy->type==SH_RESOLVED || 1127 apy->type==RD_RESOLVED || 1128 apy->type==SSCONFLICT || 1129 apy->type==SRCONFLICT || 1130 apy->type==RRCONFLICT 1131 ); 1132 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before 1133 ** REDUCEs on the list. If we reach this point it must be because 1134 ** the parser conflict had already been resolved. */ 1135 } 1136 return errcnt; 1137 } 1138 /********************* From the file "configlist.c" *************************/ 1139 /* 1140 ** Routines to processing a configuration list and building a state 1141 ** in the LEMON parser generator. 1142 */ 1143 1144 static struct config *freelist = 0; /* List of free configurations */ 1145 static struct config *current = 0; /* Top of list of configurations */ 1146 static struct config **currentend = 0; /* Last on list of configs */ 1147 static struct config *basis = 0; /* Top of list of basis configs */ 1148 static struct config **basisend = 0; /* End of list of basis configs */ 1149 1150 /* Return a pointer to a new configuration */ 1151 PRIVATE struct config *newconfig(){ 1152 struct config *newcfg; 1153 if( freelist==0 ){ 1154 int i; 1155 int amt = 3; 1156 freelist = (struct config *)calloc( amt, sizeof(struct config) ); 1157 if( freelist==0 ){ 1158 fprintf(stderr,"Unable to allocate memory for a new configuration."); 1159 exit(1); 1160 } 1161 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; 1162 freelist[amt-1].next = 0; 1163 } 1164 newcfg = freelist; 1165 freelist = freelist->next; 1166 return newcfg; 1167 } 1168 1169 /* The configuration "old" is no longer used */ 1170 PRIVATE void deleteconfig(struct config *old) 1171 { 1172 old->next = freelist; 1173 freelist = old; 1174 } 1175 1176 /* Initialized the configuration list builder */ 1177 void Configlist_init(){ 1178 current = 0; 1179 currentend = ¤t; 1180 basis = 0; 1181 basisend = &basis; 1182 Configtable_init(); 1183 return; 1184 } 1185 1186 /* Initialized the configuration list builder */ 1187 void Configlist_reset(){ 1188 current = 0; 1189 currentend = ¤t; 1190 basis = 0; 1191 basisend = &basis; 1192 Configtable_clear(0); 1193 return; 1194 } 1195 1196 /* Add another configuration to the configuration list */ 1197 struct config *Configlist_add( 1198 struct rule *rp, /* The rule */ 1199 int dot /* Index into the RHS of the rule where the dot goes */ 1200 ){ 1201 struct config *cfp, model; 1202 1203 assert( currentend!=0 ); 1204 model.rp = rp; 1205 model.dot = dot; 1206 cfp = Configtable_find(&model); 1207 if( cfp==0 ){ 1208 cfp = newconfig(); 1209 cfp->rp = rp; 1210 cfp->dot = dot; 1211 cfp->fws = SetNew(); 1212 cfp->stp = 0; 1213 cfp->fplp = cfp->bplp = 0; 1214 cfp->next = 0; 1215 cfp->bp = 0; 1216 *currentend = cfp; 1217 currentend = &cfp->next; 1218 Configtable_insert(cfp); 1219 } 1220 return cfp; 1221 } 1222 1223 /* Add a basis configuration to the configuration list */ 1224 struct config *Configlist_addbasis(struct rule *rp, int dot) 1225 { 1226 struct config *cfp, model; 1227 1228 assert( basisend!=0 ); 1229 assert( currentend!=0 ); 1230 model.rp = rp; 1231 model.dot = dot; 1232 cfp = Configtable_find(&model); 1233 if( cfp==0 ){ 1234 cfp = newconfig(); 1235 cfp->rp = rp; 1236 cfp->dot = dot; 1237 cfp->fws = SetNew(); 1238 cfp->stp = 0; 1239 cfp->fplp = cfp->bplp = 0; 1240 cfp->next = 0; 1241 cfp->bp = 0; 1242 *currentend = cfp; 1243 currentend = &cfp->next; 1244 *basisend = cfp; 1245 basisend = &cfp->bp; 1246 Configtable_insert(cfp); 1247 } 1248 return cfp; 1249 } 1250 1251 /* Compute the closure of the configuration list */ 1252 void Configlist_closure(struct lemon *lemp) 1253 { 1254 struct config *cfp, *newcfp; 1255 struct rule *rp, *newrp; 1256 struct symbol *sp, *xsp; 1257 int i, dot; 1258 1259 assert( currentend!=0 ); 1260 for(cfp=current; cfp; cfp=cfp->next){ 1261 rp = cfp->rp; 1262 dot = cfp->dot; 1263 if( dot>=rp->nrhs ) continue; 1264 sp = rp->rhs[dot]; 1265 if( sp->type==NONTERMINAL ){ 1266 if( sp->rule==0 && sp!=lemp->errsym ){ 1267 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", 1268 sp->name); 1269 lemp->errorcnt++; 1270 } 1271 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ 1272 newcfp = Configlist_add(newrp,0); 1273 for(i=dot+1; i<rp->nrhs; i++){ 1274 xsp = rp->rhs[i]; 1275 if( xsp->type==TERMINAL ){ 1276 SetAdd(newcfp->fws,xsp->index); 1277 break; 1278 }else if( xsp->type==MULTITERMINAL ){ 1279 int k; 1280 for(k=0; k<xsp->nsubsym; k++){ 1281 SetAdd(newcfp->fws, xsp->subsym[k]->index); 1282 } 1283 break; 1284 }else{ 1285 SetUnion(newcfp->fws,xsp->firstset); 1286 if( xsp->lambda==LEMON_FALSE ) break; 1287 } 1288 } 1289 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); 1290 } 1291 } 1292 } 1293 return; 1294 } 1295 1296 /* Sort the configuration list */ 1297 void Configlist_sort(){ 1298 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); 1299 currentend = 0; 1300 return; 1301 } 1302 1303 /* Sort the basis configuration list */ 1304 void Configlist_sortbasis(){ 1305 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); 1306 basisend = 0; 1307 return; 1308 } 1309 1310 /* Return a pointer to the head of the configuration list and 1311 ** reset the list */ 1312 struct config *Configlist_return(){ 1313 struct config *old; 1314 old = current; 1315 current = 0; 1316 currentend = 0; 1317 return old; 1318 } 1319 1320 /* Return a pointer to the head of the configuration list and 1321 ** reset the list */ 1322 struct config *Configlist_basis(){ 1323 struct config *old; 1324 old = basis; 1325 basis = 0; 1326 basisend = 0; 1327 return old; 1328 } 1329 1330 /* Free all elements of the given configuration list */ 1331 void Configlist_eat(struct config *cfp) 1332 { 1333 struct config *nextcfp; 1334 for(; cfp; cfp=nextcfp){ 1335 nextcfp = cfp->next; 1336 assert( cfp->fplp==0 ); 1337 assert( cfp->bplp==0 ); 1338 if( cfp->fws ) SetFree(cfp->fws); 1339 deleteconfig(cfp); 1340 } 1341 return; 1342 } 1343 /***************** From the file "error.c" *********************************/ 1344 /* 1345 ** Code for printing error message. 1346 */ 1347 1348 void ErrorMsg(const char *filename, int lineno, const char *format, ...){ 1349 va_list ap; 1350 fprintf(stderr, "%s:%d: ", filename, lineno); 1351 va_start(ap, format); 1352 vfprintf(stderr,format,ap); 1353 va_end(ap); 1354 fprintf(stderr, "\n"); 1355 } 1356 /**************** From the file "main.c" ************************************/ 1357 /* 1358 ** Main program file for the LEMON parser generator. 1359 */ 1360 1361 /* Report an out-of-memory condition and abort. This function 1362 ** is used mostly by the "MemoryCheck" macro in struct.h 1363 */ 1364 void memory_error(){ 1365 fprintf(stderr,"Out of memory. Aborting...\n"); 1366 exit(1); 1367 } 1368 1369 static int nDefine = 0; /* Number of -D options on the command line */ 1370 static char **azDefine = 0; /* Name of the -D macros */ 1371 1372 /* This routine is called with the argument to each -D command-line option. 1373 ** Add the macro defined to the azDefine array. 1374 */ 1375 static void handle_D_option(char *z){ 1376 char **paz; 1377 nDefine++; 1378 azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); 1379 if( azDefine==0 ){ 1380 fprintf(stderr,"out of memory\n"); 1381 exit(1); 1382 } 1383 paz = &azDefine[nDefine-1]; 1384 *paz = (char *) malloc( lemonStrlen(z)+1 ); 1385 if( *paz==0 ){ 1386 fprintf(stderr,"out of memory\n"); 1387 exit(1); 1388 } 1389 strcpy(*paz, z); 1390 for(z=*paz; *z && *z!='='; z++){} 1391 *z = 0; 1392 } 1393 1394 static char *user_templatename = NULL; 1395 static void handle_T_option(char *z){ 1396 user_templatename = (char *) malloc( lemonStrlen(z)+1 ); 1397 if( user_templatename==0 ){ 1398 memory_error(); 1399 } 1400 strcpy(user_templatename, z); 1401 } 1402 1403 /* The main program. Parse the command line and do it... */ 1404 int main(int argc, char **argv) 1405 { 1406 static int version = 0; 1407 static int rpflag = 0; 1408 static int basisflag = 0; 1409 static int compress = 0; 1410 static int quiet = 0; 1411 static int statistics = 0; 1412 static int mhflag = 0; 1413 static int nolinenosflag = 0; 1414 static int noResort = 0; 1415 static struct s_options options[] = { 1416 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, 1417 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, 1418 {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."}, 1419 {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, 1420 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, 1421 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."}, 1422 {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."}, 1423 {OPT_FLAG, "p", (char*)&showPrecedenceConflict, 1424 "Show conflicts resolved by precedence rules"}, 1425 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, 1426 {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"}, 1427 {OPT_FLAG, "s", (char*)&statistics, 1428 "Print parser stats to standard output."}, 1429 {OPT_FLAG, "x", (char*)&version, "Print the version number."}, 1430 {OPT_FLAG,0,0,0} 1431 }; 1432 int i; 1433 int exitcode; 1434 struct lemon lem; 1435 1436 atexit(LemonAtExit); 1437 1438 OptInit(argv,options,stderr); 1439 if( version ){ 1440 printf("Lemon version 1.0\n"); 1441 exit(0); 1442 } 1443 if( OptNArgs()!=1 ){ 1444 fprintf(stderr,"Exactly one filename argument is required.\n"); 1445 exit(1); 1446 } 1447 memset(&lem, 0, sizeof(lem)); 1448 lem.errorcnt = 0; 1449 1450 /* Initialize the machine */ 1451 Strsafe_init(); 1452 Symbol_init(); 1453 State_init(); 1454 lem.argv0 = argv[0]; 1455 lem.filename = OptArg(0); 1456 lem.basisflag = basisflag; 1457 lem.nolinenosflag = nolinenosflag; 1458 Symbol_new("$"); 1459 lem.errsym = Symbol_new("error"); 1460 lem.errsym->useCnt = 0; 1461 1462 /* Parse the input file */ 1463 Parse(&lem); 1464 if( lem.errorcnt ) exit(lem.errorcnt); 1465 if( lem.nrule==0 ){ 1466 fprintf(stderr,"Empty grammar.\n"); 1467 exit(1); 1468 } 1469 1470 /* Count and index the symbols of the grammar */ 1471 lem.nsymbol = Symbol_count(); 1472 Symbol_new("{default}"); 1473 lem.symbols = Symbol_arrayof(); 1474 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1475 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp); 1476 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; 1477 for(i=1; isupper(lem.symbols[i]->name[0]); i++); 1478 lem.nterminal = i; 1479 1480 /* Generate a reprint of the grammar, if requested on the command line */ 1481 if( rpflag ){ 1482 Reprint(&lem); 1483 }else{ 1484 /* Initialize the size for all follow and first sets */ 1485 SetSize(lem.nterminal+1); 1486 1487 /* Find the precedence for every production rule (that has one) */ 1488 FindRulePrecedences(&lem); 1489 1490 /* Compute the lambda-nonterminals and the first-sets for every 1491 ** nonterminal */ 1492 FindFirstSets(&lem); 1493 1494 /* Compute all LR(0) states. Also record follow-set propagation 1495 ** links so that the follow-set can be computed later */ 1496 lem.nstate = 0; 1497 FindStates(&lem); 1498 lem.sorted = State_arrayof(); 1499 1500 /* Tie up loose ends on the propagation links */ 1501 FindLinks(&lem); 1502 1503 /* Compute the follow set of every reducible configuration */ 1504 FindFollowSets(&lem); 1505 1506 /* Compute the action tables */ 1507 FindActions(&lem); 1508 1509 /* Compress the action tables */ 1510 if( compress==0 ) CompressTables(&lem); 1511 1512 /* Reorder and renumber the states so that states with fewer choices 1513 ** occur at the end. This is an optimization that helps make the 1514 ** generated parser tables smaller. */ 1515 if( noResort==0 ) ResortStates(&lem); 1516 1517 /* Generate a report of the parser generated. (the "y.output" file) */ 1518 if( !quiet ) ReportOutput(&lem); 1519 1520 /* Generate the source code for the parser */ 1521 ReportTable(&lem, mhflag); 1522 1523 /* Produce a header file for use by the scanner. (This step is 1524 ** omitted if the "-m" option is used because makeheaders will 1525 ** generate the file for us.) */ 1526 if( !mhflag ) ReportHeader(&lem); 1527 } 1528 if( statistics ){ 1529 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", 1530 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); 1531 printf(" %d states, %d parser table entries, %d conflicts\n", 1532 lem.nstate, lem.tablesize, lem.nconflict); 1533 } 1534 if( lem.nconflict > 0 ){ 1535 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); 1536 } 1537 1538 /* return 0 on success, 1 on failure. */ 1539 exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; 1540 successful_exit = (exitcode == 0); 1541 exit(exitcode); 1542 return (exitcode); 1543 } 1544 /******************** From the file "msort.c" *******************************/ 1545 /* 1546 ** A generic merge-sort program. 1547 ** 1548 ** USAGE: 1549 ** Let "ptr" be a pointer to some structure which is at the head of 1550 ** a null-terminated list. Then to sort the list call: 1551 ** 1552 ** ptr = msort(ptr,&(ptr->next),cmpfnc); 1553 ** 1554 ** In the above, "cmpfnc" is a pointer to a function which compares 1555 ** two instances of the structure and returns an integer, as in 1556 ** strcmp. The second argument is a pointer to the pointer to the 1557 ** second element of the linked list. This address is used to compute 1558 ** the offset to the "next" field within the structure. The offset to 1559 ** the "next" field must be constant for all structures in the list. 1560 ** 1561 ** The function returns a new pointer which is the head of the list 1562 ** after sorting. 1563 ** 1564 ** ALGORITHM: 1565 ** Merge-sort. 1566 */ 1567 1568 /* 1569 ** Return a pointer to the next structure in the linked list. 1570 */ 1571 #define NEXT(A) (*(char**)(((unsigned long)A)+offset)) 1572 1573 /* 1574 ** Inputs: 1575 ** a: A sorted, null-terminated linked list. (May be null). 1576 ** b: A sorted, null-terminated linked list. (May be null). 1577 ** cmp: A pointer to the comparison function. 1578 ** offset: Offset in the structure to the "next" field. 1579 ** 1580 ** Return Value: 1581 ** A pointer to the head of a sorted list containing the elements 1582 ** of both a and b. 1583 ** 1584 ** Side effects: 1585 ** The "next" pointers for elements in the lists a and b are 1586 ** changed. 1587 */ 1588 static char *merge( 1589 char *a, 1590 char *b, 1591 int (*cmp)(const char*,const char*), 1592 int offset 1593 ){ 1594 char *ptr, *head; 1595 1596 if( a==0 ){ 1597 head = b; 1598 }else if( b==0 ){ 1599 head = a; 1600 }else{ 1601 if( (*cmp)(a,b)<=0 ){ 1602 ptr = a; 1603 a = NEXT(a); 1604 }else{ 1605 ptr = b; 1606 b = NEXT(b); 1607 } 1608 head = ptr; 1609 while( a && b ){ 1610 if( (*cmp)(a,b)<=0 ){ 1611 NEXT(ptr) = a; 1612 ptr = a; 1613 a = NEXT(a); 1614 }else{ 1615 NEXT(ptr) = b; 1616 ptr = b; 1617 b = NEXT(b); 1618 } 1619 } 1620 if( a ) NEXT(ptr) = a; 1621 else NEXT(ptr) = b; 1622 } 1623 return head; 1624 } 1625 1626 /* 1627 ** Inputs: 1628 ** list: Pointer to a singly-linked list of structures. 1629 ** next: Pointer to pointer to the second element of the list. 1630 ** cmp: A comparison function. 1631 ** 1632 ** Return Value: 1633 ** A pointer to the head of a sorted list containing the elements 1634 ** orginally in list. 1635 ** 1636 ** Side effects: 1637 ** The "next" pointers for elements in list are changed. 1638 */ 1639 #define LISTSIZE 30 1640 static char *msort( 1641 char *list, 1642 char **next, 1643 int (*cmp)(const char*,const char*) 1644 ){ 1645 unsigned long offset; 1646 char *ep; 1647 char *set[LISTSIZE]; 1648 int i; 1649 offset = (unsigned long)next - (unsigned long)list; 1650 for(i=0; i<LISTSIZE; i++) set[i] = 0; 1651 while( list ){ 1652 ep = list; 1653 list = NEXT(list); 1654 NEXT(ep) = 0; 1655 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){ 1656 ep = merge(ep,set[i],cmp,offset); 1657 set[i] = 0; 1658 } 1659 set[i] = ep; 1660 } 1661 ep = 0; 1662 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset); 1663 return ep; 1664 } 1665 /************************ From the file "option.c" **************************/ 1666 static char **argv; 1667 static struct s_options *op; 1668 static FILE *errstream; 1669 1670 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0) 1671 1672 /* 1673 ** Print the command line with a carrot pointing to the k-th character 1674 ** of the n-th field. 1675 */ 1676 static void errline(int n, int k, FILE *err) 1677 { 1678 int spcnt, i; 1679 if( argv[0] ) fprintf(err,"%s",argv[0]); 1680 spcnt = lemonStrlen(argv[0]) + 1; 1681 for(i=1; i<n && argv[i]; i++){ 1682 fprintf(err," %s",argv[i]); 1683 spcnt += lemonStrlen(argv[i])+1; 1684 } 1685 spcnt += k; 1686 for(; argv[i]; i++) fprintf(err," %s",argv[i]); 1687 if( spcnt<20 ){ 1688 fprintf(err,"\n%*s^-- here\n",spcnt,""); 1689 }else{ 1690 fprintf(err,"\n%*shere --^\n",spcnt-7,""); 1691 } 1692 } 1693 1694 /* 1695 ** Return the index of the N-th non-switch argument. Return -1 1696 ** if N is out of range. 1697 */ 1698 static int argindex(int n) 1699 { 1700 int i; 1701 int dashdash = 0; 1702 if( argv!=0 && *argv!=0 ){ 1703 for(i=1; argv[i]; i++){ 1704 if( dashdash || !ISOPT(argv[i]) ){ 1705 if( n==0 ) return i; 1706 n--; 1707 } 1708 if( strcmp(argv[i],"--")==0 ) dashdash = 1; 1709 } 1710 } 1711 return -1; 1712 } 1713 1714 static char emsg[] = "Command line syntax error: "; 1715 1716 /* 1717 ** Process a flag command line argument. 1718 */ 1719 static int handleflags(int i, FILE *err) 1720 { 1721 int v; 1722 int errcnt = 0; 1723 int j; 1724 for(j=0; op[j].label; j++){ 1725 if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break; 1726 } 1727 v = argv[i][0]=='-' ? 1 : 0; 1728 if( op[j].label==0 ){ 1729 if( err ){ 1730 fprintf(err,"%sundefined option.\n",emsg); 1731 errline(i,1,err); 1732 } 1733 errcnt++; 1734 }else if( op[j].type==OPT_FLAG ){ 1735 *((int*)op[j].arg) = v; 1736 }else if( op[j].type==OPT_FFLAG ){ 1737 (*(void(*)(int))(op[j].arg))(v); 1738 }else if( op[j].type==OPT_FSTR ){ 1739 (*(void(*)(char *))(op[j].arg))(&argv[i][2]); 1740 }else{ 1741 if( err ){ 1742 fprintf(err,"%smissing argument on switch.\n",emsg); 1743 errline(i,1,err); 1744 } 1745 errcnt++; 1746 } 1747 return errcnt; 1748 } 1749 1750 /* 1751 ** Process a command line switch which has an argument. 1752 */ 1753 static int handleswitch(int i, FILE *err) 1754 { 1755 int lv = 0; 1756 double dv = 0.0; 1757 char *sv = 0, *end; 1758 char *cp; 1759 int j; 1760 int errcnt = 0; 1761 cp = strchr(argv[i],'='); 1762 assert( cp!=0 ); 1763 *cp = 0; 1764 for(j=0; op[j].label; j++){ 1765 if( strcmp(argv[i],op[j].label)==0 ) break; 1766 } 1767 *cp = '='; 1768 if( op[j].label==0 ){ 1769 if( err ){ 1770 fprintf(err,"%sundefined option.\n",emsg); 1771 errline(i,0,err); 1772 } 1773 errcnt++; 1774 }else{ 1775 cp++; 1776 switch( op[j].type ){ 1777 case OPT_FLAG: 1778 case OPT_FFLAG: 1779 if( err ){ 1780 fprintf(err,"%soption requires an argument.\n",emsg); 1781 errline(i,0,err); 1782 } 1783 errcnt++; 1784 break; 1785 case OPT_DBL: 1786 case OPT_FDBL: 1787 dv = strtod(cp,&end); 1788 if( *end ){ 1789 if( err ){ 1790 fprintf(err,"%sillegal character in floating-point argument.\n",emsg); 1791 errline(i,((unsigned long)end)-(unsigned long)argv[i],err); 1792 } 1793 errcnt++; 1794 } 1795 break; 1796 case OPT_INT: 1797 case OPT_FINT: 1798 lv = strtol(cp,&end,0); 1799 if( *end ){ 1800 if( err ){ 1801 fprintf(err,"%sillegal character in integer argument.\n",emsg); 1802 errline(i,((unsigned long)end)-(unsigned long)argv[i],err); 1803 } 1804 errcnt++; 1805 } 1806 break; 1807 case OPT_STR: 1808 case OPT_FSTR: 1809 sv = cp; 1810 break; 1811 } 1812 switch( op[j].type ){ 1813 case OPT_FLAG: 1814 case OPT_FFLAG: 1815 break; 1816 case OPT_DBL: 1817 *(double*)(op[j].arg) = dv; 1818 break; 1819 case OPT_FDBL: 1820 (*(void(*)(double))(op[j].arg))(dv); 1821 break; 1822 case OPT_INT: 1823 *(int*)(op[j].arg) = lv; 1824 break; 1825 case OPT_FINT: 1826 (*(void(*)(int))(op[j].arg))((int)lv); 1827 break; 1828 case OPT_STR: 1829 *(char**)(op[j].arg) = sv; 1830 break; 1831 case OPT_FSTR: 1832 (*(void(*)(char *))(op[j].arg))(sv); 1833 break; 1834 } 1835 } 1836 return errcnt; 1837 } 1838 1839 int OptInit(char **a, struct s_options *o, FILE *err) 1840 { 1841 int errcnt = 0; 1842 argv = a; 1843 op = o; 1844 errstream = err; 1845 if( argv && *argv && op ){ 1846 int i; 1847 for(i=1; argv[i]; i++){ 1848 if( argv[i][0]=='+' || argv[i][0]=='-' ){ 1849 errcnt += handleflags(i,err); 1850 }else if( strchr(argv[i],'=') ){ 1851 errcnt += handleswitch(i,err); 1852 } 1853 } 1854 } 1855 if( errcnt>0 ){ 1856 fprintf(err,"Valid command line options for \"%s\" are:\n",*a); 1857 OptPrint(); 1858 exit(1); 1859 } 1860 return 0; 1861 } 1862 1863 int OptNArgs(){ 1864 int cnt = 0; 1865 int dashdash = 0; 1866 int i; 1867 if( argv!=0 && argv[0]!=0 ){ 1868 for(i=1; argv[i]; i++){ 1869 if( dashdash || !ISOPT(argv[i]) ) cnt++; 1870 if( strcmp(argv[i],"--")==0 ) dashdash = 1; 1871 } 1872 } 1873 return cnt; 1874 } 1875 1876 char *OptArg(int n) 1877 { 1878 int i; 1879 i = argindex(n); 1880 return i>=0 ? argv[i] : 0; 1881 } 1882 1883 void OptErr(int n) 1884 { 1885 int i; 1886 i = argindex(n); 1887 if( i>=0 ) errline(i,0,errstream); 1888 } 1889 1890 void OptPrint(){ 1891 int i; 1892 int max, len; 1893 max = 0; 1894 for(i=0; op[i].label; i++){ 1895 len = lemonStrlen(op[i].label) + 1; 1896 switch( op[i].type ){ 1897 case OPT_FLAG: 1898 case OPT_FFLAG: 1899 break; 1900 case OPT_INT: 1901 case OPT_FINT: 1902 len += 9; /* length of "<integer>" */ 1903 break; 1904 case OPT_DBL: 1905 case OPT_FDBL: 1906 len += 6; /* length of "<real>" */ 1907 break; 1908 case OPT_STR: 1909 case OPT_FSTR: 1910 len += 8; /* length of "<string>" */ 1911 break; 1912 } 1913 if( len>max ) max = len; 1914 } 1915 for(i=0; op[i].label; i++){ 1916 switch( op[i].type ){ 1917 case OPT_FLAG: 1918 case OPT_FFLAG: 1919 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message); 1920 break; 1921 case OPT_INT: 1922 case OPT_FINT: 1923 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label, 1924 (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message); 1925 break; 1926 case OPT_DBL: 1927 case OPT_FDBL: 1928 fprintf(errstream," %s=<real>%*s %s\n",op[i].label, 1929 (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message); 1930 break; 1931 case OPT_STR: 1932 case OPT_FSTR: 1933 fprintf(errstream," %s=<string>%*s %s\n",op[i].label, 1934 (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message); 1935 break; 1936 } 1937 } 1938 } 1939 /*********************** From the file "parse.c" ****************************/ 1940 /* 1941 ** Input file parser for the LEMON parser generator. 1942 */ 1943 1944 /* The state of the parser */ 1945 enum e_state { 1946 INITIALIZE, 1947 WAITING_FOR_DECL_OR_RULE, 1948 WAITING_FOR_DECL_KEYWORD, 1949 WAITING_FOR_DECL_ARG, 1950 WAITING_FOR_PRECEDENCE_SYMBOL, 1951 WAITING_FOR_ARROW, 1952 IN_RHS, 1953 LHS_ALIAS_1, 1954 LHS_ALIAS_2, 1955 LHS_ALIAS_3, 1956 RHS_ALIAS_1, 1957 RHS_ALIAS_2, 1958 PRECEDENCE_MARK_1, 1959 PRECEDENCE_MARK_2, 1960 RESYNC_AFTER_RULE_ERROR, 1961 RESYNC_AFTER_DECL_ERROR, 1962 WAITING_FOR_DESTRUCTOR_SYMBOL, 1963 WAITING_FOR_DATATYPE_SYMBOL, 1964 WAITING_FOR_FALLBACK_ID, 1965 WAITING_FOR_WILDCARD_ID 1966 }; 1967 struct pstate { 1968 char *filename; /* Name of the input file */ 1969 int tokenlineno; /* Linenumber at which current token starts */ 1970 int errorcnt; /* Number of errors so far */ 1971 char *tokenstart; /* Text of current token */ 1972 struct lemon *gp; /* Global state vector */ 1973 enum e_state state; /* The state of the parser */ 1974 struct symbol *fallback; /* The fallback token */ 1975 struct symbol *lhs; /* Left-hand side of current rule */ 1976 const char *lhsalias; /* Alias for the LHS */ 1977 int nrhs; /* Number of right-hand side symbols seen */ 1978 struct symbol *rhs[MAXRHS]; /* RHS symbols */ 1979 const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ 1980 struct rule *prevrule; /* Previous rule parsed */ 1981 const char *declkeyword; /* Keyword of a declaration */ 1982 char **declargslot; /* Where the declaration argument should be put */ 1983 int insertLineMacro; /* Add #line before declaration insert */ 1984 int *decllinenoslot; /* Where to write declaration line number */ 1985 enum e_assoc declassoc; /* Assign this association to decl arguments */ 1986 int preccounter; /* Assign this precedence to decl arguments */ 1987 struct rule *firstrule; /* Pointer to first rule in the grammar */ 1988 struct rule *lastrule; /* Pointer to the most recently parsed rule */ 1989 }; 1990 1991 /* Parse a single token */ 1992 static void parseonetoken(struct pstate *psp) 1993 { 1994 const char *x; 1995 x = Strsafe(psp->tokenstart); /* Save the token permanently */ 1996 #if 0 1997 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, 1998 x,psp->state); 1999 #endif 2000 switch( psp->state ){ 2001 case INITIALIZE: 2002 psp->prevrule = 0; 2003 psp->preccounter = 0; 2004 psp->firstrule = psp->lastrule = 0; 2005 psp->gp->nrule = 0; 2006 /* Fall thru to next case */ 2007 case WAITING_FOR_DECL_OR_RULE: 2008 if( x[0]=='%' ){ 2009 psp->state = WAITING_FOR_DECL_KEYWORD; 2010 }else if( islower(x[0]) ){ 2011 psp->lhs = Symbol_new(x); 2012 psp->nrhs = 0; 2013 psp->lhsalias = 0; 2014 psp->state = WAITING_FOR_ARROW; 2015 }else if( x[0]=='{' ){ 2016 if( psp->prevrule==0 ){ 2017 ErrorMsg(psp->filename,psp->tokenlineno, 2018 "There is no prior rule opon which to attach the code \ 2019 fragment which begins on this line."); 2020 psp->errorcnt++; 2021 }else if( psp->prevrule->code!=0 ){ 2022 ErrorMsg(psp->filename,psp->tokenlineno, 2023 "Code fragment beginning on this line is not the first \ 2024 to follow the previous rule."); 2025 psp->errorcnt++; 2026 }else{ 2027 psp->prevrule->line = psp->tokenlineno; 2028 psp->prevrule->code = &x[1]; 2029 } 2030 }else if( x[0]=='[' ){ 2031 psp->state = PRECEDENCE_MARK_1; 2032 }else{ 2033 ErrorMsg(psp->filename,psp->tokenlineno, 2034 "Token \"%s\" should be either \"%%\" or a nonterminal name.", 2035 x); 2036 psp->errorcnt++; 2037 } 2038 break; 2039 case PRECEDENCE_MARK_1: 2040 if( !isupper(x[0]) ){ 2041 ErrorMsg(psp->filename,psp->tokenlineno, 2042 "The precedence symbol must be a terminal."); 2043 psp->errorcnt++; 2044 }else if( psp->prevrule==0 ){ 2045 ErrorMsg(psp->filename,psp->tokenlineno, 2046 "There is no prior rule to assign precedence \"[%s]\".",x); 2047 psp->errorcnt++; 2048 }else if( psp->prevrule->precsym!=0 ){ 2049 ErrorMsg(psp->filename,psp->tokenlineno, 2050 "Precedence mark on this line is not the first \ 2051 to follow the previous rule."); 2052 psp->errorcnt++; 2053 }else{ 2054 psp->prevrule->precsym = Symbol_new(x); 2055 } 2056 psp->state = PRECEDENCE_MARK_2; 2057 break; 2058 case PRECEDENCE_MARK_2: 2059 if( x[0]!=']' ){ 2060 ErrorMsg(psp->filename,psp->tokenlineno, 2061 "Missing \"]\" on precedence mark."); 2062 psp->errorcnt++; 2063 } 2064 psp->state = WAITING_FOR_DECL_OR_RULE; 2065 break; 2066 case WAITING_FOR_ARROW: 2067 if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2068 psp->state = IN_RHS; 2069 }else if( x[0]=='(' ){ 2070 psp->state = LHS_ALIAS_1; 2071 }else{ 2072 ErrorMsg(psp->filename,psp->tokenlineno, 2073 "Expected to see a \":\" following the LHS symbol \"%s\".", 2074 psp->lhs->name); 2075 psp->errorcnt++; 2076 psp->state = RESYNC_AFTER_RULE_ERROR; 2077 } 2078 break; 2079 case LHS_ALIAS_1: 2080 if( isalpha(x[0]) ){ 2081 psp->lhsalias = x; 2082 psp->state = LHS_ALIAS_2; 2083 }else{ 2084 ErrorMsg(psp->filename,psp->tokenlineno, 2085 "\"%s\" is not a valid alias for the LHS \"%s\"\n", 2086 x,psp->lhs->name); 2087 psp->errorcnt++; 2088 psp->state = RESYNC_AFTER_RULE_ERROR; 2089 } 2090 break; 2091 case LHS_ALIAS_2: 2092 if( x[0]==')' ){ 2093 psp->state = LHS_ALIAS_3; 2094 }else{ 2095 ErrorMsg(psp->filename,psp->tokenlineno, 2096 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2097 psp->errorcnt++; 2098 psp->state = RESYNC_AFTER_RULE_ERROR; 2099 } 2100 break; 2101 case LHS_ALIAS_3: 2102 if( x[0]==':' && x[1]==':' && x[2]=='=' ){ 2103 psp->state = IN_RHS; 2104 }else{ 2105 ErrorMsg(psp->filename,psp->tokenlineno, 2106 "Missing \"->\" following: \"%s(%s)\".", 2107 psp->lhs->name,psp->lhsalias); 2108 psp->errorcnt++; 2109 psp->state = RESYNC_AFTER_RULE_ERROR; 2110 } 2111 break; 2112 case IN_RHS: 2113 if( x[0]=='.' ){ 2114 struct rule *rp; 2115 rp = (struct rule *)calloc( sizeof(struct rule) + 2116 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); 2117 if( rp==0 ){ 2118 ErrorMsg(psp->filename,psp->tokenlineno, 2119 "Can't allocate enough memory for this rule."); 2120 psp->errorcnt++; 2121 psp->prevrule = 0; 2122 }else{ 2123 int i; 2124 rp->ruleline = psp->tokenlineno; 2125 rp->rhs = (struct symbol**)&rp[1]; 2126 rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); 2127 for(i=0; i<psp->nrhs; i++){ 2128 rp->rhs[i] = psp->rhs[i]; 2129 rp->rhsalias[i] = psp->alias[i]; 2130 } 2131 rp->lhs = psp->lhs; 2132 rp->lhsalias = psp->lhsalias; 2133 rp->nrhs = psp->nrhs; 2134 rp->code = 0; 2135 rp->precsym = 0; 2136 rp->index = psp->gp->nrule++; 2137 rp->nextlhs = rp->lhs->rule; 2138 rp->lhs->rule = rp; 2139 rp->next = 0; 2140 if( psp->firstrule==0 ){ 2141 psp->firstrule = psp->lastrule = rp; 2142 }else{ 2143 psp->lastrule->next = rp; 2144 psp->lastrule = rp; 2145 } 2146 psp->prevrule = rp; 2147 } 2148 psp->state = WAITING_FOR_DECL_OR_RULE; 2149 }else if( isalpha(x[0]) ){ 2150 if( psp->nrhs>=MAXRHS ){ 2151 ErrorMsg(psp->filename,psp->tokenlineno, 2152 "Too many symbols on RHS of rule beginning at \"%s\".", 2153 x); 2154 psp->errorcnt++; 2155 psp->state = RESYNC_AFTER_RULE_ERROR; 2156 }else{ 2157 psp->rhs[psp->nrhs] = Symbol_new(x); 2158 psp->alias[psp->nrhs] = 0; 2159 psp->nrhs++; 2160 } 2161 }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ 2162 struct symbol *msp = psp->rhs[psp->nrhs-1]; 2163 if( msp->type!=MULTITERMINAL ){ 2164 struct symbol *origsp = msp; 2165 msp = (struct symbol *) calloc(1,sizeof(*msp)); 2166 memset(msp, 0, sizeof(*msp)); 2167 msp->type = MULTITERMINAL; 2168 msp->nsubsym = 1; 2169 msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); 2170 msp->subsym[0] = origsp; 2171 msp->name = origsp->name; 2172 psp->rhs[psp->nrhs-1] = msp; 2173 } 2174 msp->nsubsym++; 2175 msp->subsym = (struct symbol **) realloc(msp->subsym, 2176 sizeof(struct symbol*)*msp->nsubsym); 2177 msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); 2178 if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){ 2179 ErrorMsg(psp->filename,psp->tokenlineno, 2180 "Cannot form a compound containing a non-terminal"); 2181 psp->errorcnt++; 2182 } 2183 }else if( x[0]=='(' && psp->nrhs>0 ){ 2184 psp->state = RHS_ALIAS_1; 2185 }else{ 2186 ErrorMsg(psp->filename,psp->tokenlineno, 2187 "Illegal character on RHS of rule: \"%s\".",x); 2188 psp->errorcnt++; 2189 psp->state = RESYNC_AFTER_RULE_ERROR; 2190 } 2191 break; 2192 case RHS_ALIAS_1: 2193 if( isalpha(x[0]) ){ 2194 psp->alias[psp->nrhs-1] = x; 2195 psp->state = RHS_ALIAS_2; 2196 }else{ 2197 ErrorMsg(psp->filename,psp->tokenlineno, 2198 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", 2199 x,psp->rhs[psp->nrhs-1]->name); 2200 psp->errorcnt++; 2201 psp->state = RESYNC_AFTER_RULE_ERROR; 2202 } 2203 break; 2204 case RHS_ALIAS_2: 2205 if( x[0]==')' ){ 2206 psp->state = IN_RHS; 2207 }else{ 2208 ErrorMsg(psp->filename,psp->tokenlineno, 2209 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); 2210 psp->errorcnt++; 2211 psp->state = RESYNC_AFTER_RULE_ERROR; 2212 } 2213 break; 2214 case WAITING_FOR_DECL_KEYWORD: 2215 if( isalpha(x[0]) ){ 2216 psp->declkeyword = x; 2217 psp->declargslot = 0; 2218 psp->decllinenoslot = 0; 2219 psp->insertLineMacro = 1; 2220 psp->state = WAITING_FOR_DECL_ARG; 2221 if( strcmp(x,"name")==0 ){ 2222 psp->declargslot = &(psp->gp->name); 2223 psp->insertLineMacro = 0; 2224 }else if( strcmp(x,"include")==0 ){ 2225 psp->declargslot = &(psp->gp->include); 2226 }else if( strcmp(x,"code")==0 ){ 2227 psp->declargslot = &(psp->gp->extracode); 2228 }else if( strcmp(x,"token_destructor")==0 ){ 2229 psp->declargslot = &psp->gp->tokendest; 2230 }else if( strcmp(x,"default_destructor")==0 ){ 2231 psp->declargslot = &psp->gp->vardest; 2232 }else if( strcmp(x,"token_prefix")==0 ){ 2233 psp->declargslot = &psp->gp->tokenprefix; 2234 psp->insertLineMacro = 0; 2235 }else if( strcmp(x,"syntax_error")==0 ){ 2236 psp->declargslot = &(psp->gp->error); 2237 }else if( strcmp(x,"parse_accept")==0 ){ 2238 psp->declargslot = &(psp->gp->accept); 2239 }else if( strcmp(x,"parse_failure")==0 ){ 2240 psp->declargslot = &(psp->gp->failure); 2241 }else if( strcmp(x,"stack_overflow")==0 ){ 2242 psp->declargslot = &(psp->gp->overflow); 2243 }else if( strcmp(x,"extra_argument")==0 ){ 2244 psp->declargslot = &(psp->gp->arg); 2245 psp->insertLineMacro = 0; 2246 }else if( strcmp(x,"token_type")==0 ){ 2247 psp->declargslot = &(psp->gp->tokentype); 2248 psp->insertLineMacro = 0; 2249 }else if( strcmp(x,"default_type")==0 ){ 2250 psp->declargslot = &(psp->gp->vartype); 2251 psp->insertLineMacro = 0; 2252 }else if( strcmp(x,"stack_size")==0 ){ 2253 psp->declargslot = &(psp->gp->stacksize); 2254 psp->insertLineMacro = 0; 2255 }else if( strcmp(x,"start_symbol")==0 ){ 2256 psp->declargslot = &(psp->gp->start); 2257 psp->insertLineMacro = 0; 2258 }else if( strcmp(x,"left")==0 ){ 2259 psp->preccounter++; 2260 psp->declassoc = LEFT; 2261 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2262 }else if( strcmp(x,"right")==0 ){ 2263 psp->preccounter++; 2264 psp->declassoc = RIGHT; 2265 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2266 }else if( strcmp(x,"nonassoc")==0 ){ 2267 psp->preccounter++; 2268 psp->declassoc = NONE; 2269 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; 2270 }else if( strcmp(x,"destructor")==0 ){ 2271 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; 2272 }else if( strcmp(x,"type")==0 ){ 2273 psp->state = WAITING_FOR_DATATYPE_SYMBOL; 2274 }else if( strcmp(x,"fallback")==0 ){ 2275 psp->fallback = 0; 2276 psp->state = WAITING_FOR_FALLBACK_ID; 2277 }else if( strcmp(x,"wildcard")==0 ){ 2278 psp->state = WAITING_FOR_WILDCARD_ID; 2279 }else{ 2280 ErrorMsg(psp->filename,psp->tokenlineno, 2281 "Unknown declaration keyword: \"%%%s\".",x); 2282 psp->errorcnt++; 2283 psp->state = RESYNC_AFTER_DECL_ERROR; 2284 } 2285 }else{ 2286 ErrorMsg(psp->filename,psp->tokenlineno, 2287 "Illegal declaration keyword: \"%s\".",x); 2288 psp->errorcnt++; 2289 psp->state = RESYNC_AFTER_DECL_ERROR; 2290 } 2291 break; 2292 case WAITING_FOR_DESTRUCTOR_SYMBOL: 2293 if( !isalpha(x[0]) ){ 2294 ErrorMsg(psp->filename,psp->tokenlineno, 2295 "Symbol name missing after %%destructor keyword"); 2296 psp->errorcnt++; 2297 psp->state = RESYNC_AFTER_DECL_ERROR; 2298 }else{ 2299 struct symbol *sp = Symbol_new(x); 2300 psp->declargslot = &sp->destructor; 2301 psp->decllinenoslot = &sp->destLineno; 2302 psp->insertLineMacro = 1; 2303 psp->state = WAITING_FOR_DECL_ARG; 2304 } 2305 break; 2306 case WAITING_FOR_DATATYPE_SYMBOL: 2307 if( !isalpha(x[0]) ){ 2308 ErrorMsg(psp->filename,psp->tokenlineno, 2309 "Symbol name missing after %%type keyword"); 2310 psp->errorcnt++; 2311 psp->state = RESYNC_AFTER_DECL_ERROR; 2312 }else{ 2313 struct symbol *sp = Symbol_find(x); 2314 if((sp) && (sp->datatype)){ 2315 ErrorMsg(psp->filename,psp->tokenlineno, 2316 "Symbol %%type \"%s\" already defined", x); 2317 psp->errorcnt++; 2318 psp->state = RESYNC_AFTER_DECL_ERROR; 2319 }else{ 2320 if (!sp){ 2321 sp = Symbol_new(x); 2322 } 2323 psp->declargslot = &sp->datatype; 2324 psp->insertLineMacro = 0; 2325 psp->state = WAITING_FOR_DECL_ARG; 2326 } 2327 } 2328 break; 2329 case WAITING_FOR_PRECEDENCE_SYMBOL: 2330 if( x[0]=='.' ){ 2331 psp->state = WAITING_FOR_DECL_OR_RULE; 2332 }else if( isupper(x[0]) ){ 2333 struct symbol *sp; 2334 sp = Symbol_new(x); 2335 if( sp->prec>=0 ){ 2336 ErrorMsg(psp->filename,psp->tokenlineno, 2337 "Symbol \"%s\" has already be given a precedence.",x); 2338 psp->errorcnt++; 2339 }else{ 2340 sp->prec = psp->preccounter; 2341 sp->assoc = psp->declassoc; 2342 } 2343 }else{ 2344 ErrorMsg(psp->filename,psp->tokenlineno, 2345 "Can't assign a precedence to \"%s\".",x); 2346 psp->errorcnt++; 2347 } 2348 break; 2349 case WAITING_FOR_DECL_ARG: 2350 if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){ 2351 const char *zOld, *zNew; 2352 char *zBuf, *z; 2353 int nOld, n, nLine, nNew, nBack; 2354 int addLineMacro; 2355 char zLine[50]; 2356 zNew = x; 2357 if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; 2358 nNew = lemonStrlen(zNew); 2359 if( *psp->declargslot ){ 2360 zOld = *psp->declargslot; 2361 }else{ 2362 zOld = ""; 2363 } 2364 nOld = lemonStrlen(zOld); 2365 n = nOld + nNew + 20; 2366 addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && 2367 (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); 2368 if( addLineMacro ){ 2369 for(z=psp->filename, nBack=0; *z; z++){ 2370 if( *z=='\\' ) nBack++; 2371 } 2372 sprintf(zLine, "#line %d ", psp->tokenlineno); 2373 nLine = lemonStrlen(zLine); 2374 n += nLine + lemonStrlen(psp->filename) + nBack; 2375 } 2376 *psp->declargslot = (char *) realloc(*psp->declargslot, n); 2377 zBuf = *psp->declargslot + nOld; 2378 if( addLineMacro ){ 2379 if( nOld && zBuf[-1]!='\n' ){ 2380 *(zBuf++) = '\n'; 2381 } 2382 memcpy(zBuf, zLine, nLine); 2383 zBuf += nLine; 2384 *(zBuf++) = '"'; 2385 for(z=psp->filename; *z; z++){ 2386 if( *z=='\\' ){ 2387 *(zBuf++) = '\\'; 2388 } 2389 *(zBuf++) = *z; 2390 } 2391 *(zBuf++) = '"'; 2392 *(zBuf++) = '\n'; 2393 } 2394 if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ 2395 psp->decllinenoslot[0] = psp->tokenlineno; 2396 } 2397 memcpy(zBuf, zNew, nNew); 2398 zBuf += nNew; 2399 *zBuf = 0; 2400 psp->state = WAITING_FOR_DECL_OR_RULE; 2401 }else{ 2402 ErrorMsg(psp->filename,psp->tokenlineno, 2403 "Illegal argument to %%%s: %s",psp->declkeyword,x); 2404 psp->errorcnt++; 2405 psp->state = RESYNC_AFTER_DECL_ERROR; 2406 } 2407 break; 2408 case WAITING_FOR_FALLBACK_ID: 2409 if( x[0]=='.' ){ 2410 psp->state = WAITING_FOR_DECL_OR_RULE; 2411 }else if( !isupper(x[0]) ){ 2412 ErrorMsg(psp->filename, psp->tokenlineno, 2413 "%%fallback argument \"%s\" should be a token", x); 2414 psp->errorcnt++; 2415 }else{ 2416 struct symbol *sp = Symbol_new(x); 2417 if( psp->fallback==0 ){ 2418 psp->fallback = sp; 2419 }else if( sp->fallback ){ 2420 ErrorMsg(psp->filename, psp->tokenlineno, 2421 "More than one fallback assigned to token %s", x); 2422 psp->errorcnt++; 2423 }else{ 2424 sp->fallback = psp->fallback; 2425 psp->gp->has_fallback = 1; 2426 } 2427 } 2428 break; 2429 case WAITING_FOR_WILDCARD_ID: 2430 if( x[0]=='.' ){ 2431 psp->state = WAITING_FOR_DECL_OR_RULE; 2432 }else if( !isupper(x[0]) ){ 2433 ErrorMsg(psp->filename, psp->tokenlineno, 2434 "%%wildcard argument \"%s\" should be a token", x); 2435 psp->errorcnt++; 2436 }else{ 2437 struct symbol *sp = Symbol_new(x); 2438 if( psp->gp->wildcard==0 ){ 2439 psp->gp->wildcard = sp; 2440 }else{ 2441 ErrorMsg(psp->filename, psp->tokenlineno, 2442 "Extra wildcard to token: %s", x); 2443 psp->errorcnt++; 2444 } 2445 } 2446 break; 2447 case RESYNC_AFTER_RULE_ERROR: 2448 /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2449 ** break; */ 2450 case RESYNC_AFTER_DECL_ERROR: 2451 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; 2452 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; 2453 break; 2454 } 2455 } 2456 2457 /* Run the preprocessor over the input file text. The global variables 2458 ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined 2459 ** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and 2460 ** comments them out. Text in between is also commented out as appropriate. 2461 */ 2462 static void preprocess_input(char *z){ 2463 int i, j, k, n; 2464 int exclude = 0; 2465 int start = 0; 2466 int lineno = 1; 2467 int start_lineno = 1; 2468 for(i=0; z[i]; i++){ 2469 if( z[i]=='\n' ) lineno++; 2470 if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; 2471 if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){ 2472 if( exclude ){ 2473 exclude--; 2474 if( exclude==0 ){ 2475 for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; 2476 } 2477 } 2478 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; 2479 }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6])) 2480 || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){ 2481 if( exclude ){ 2482 exclude++; 2483 }else{ 2484 for(j=i+7; isspace(z[j]); j++){} 2485 for(n=0; z[j+n] && !isspace(z[j+n]); n++){} 2486 exclude = 1; 2487 for(k=0; k<nDefine; k++){ 2488 if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){ 2489 exclude = 0; 2490 break; 2491 } 2492 } 2493 if( z[i+3]=='n' ) exclude = !exclude; 2494 if( exclude ){ 2495 start = i; 2496 start_lineno = lineno; 2497 } 2498 } 2499 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; 2500 } 2501 } 2502 if( exclude ){ 2503 fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno); 2504 exit(1); 2505 } 2506 } 2507 2508 /* In spite of its name, this function is really a scanner. It read 2509 ** in the entire input file (all at once) then tokenizes it. Each 2510 ** token is passed to the function "parseonetoken" which builds all 2511 ** the appropriate data structures in the global state vector "gp". 2512 */ 2513 void Parse(struct lemon *gp) 2514 { 2515 struct pstate ps; 2516 FILE *fp; 2517 char *filebuf; 2518 int filesize; 2519 int lineno; 2520 int c; 2521 char *cp, *nextcp; 2522 int startline = 0; 2523 2524 memset(&ps, '\0', sizeof(ps)); 2525 ps.gp = gp; 2526 ps.filename = gp->filename; 2527 ps.errorcnt = 0; 2528 ps.state = INITIALIZE; 2529 2530 /* Begin by reading the input file */ 2531 fp = fopen(ps.filename,"rb"); 2532 if( fp==0 ){ 2533 ErrorMsg(ps.filename,0,"Can't open this file for reading."); 2534 gp->errorcnt++; 2535 return; 2536 } 2537 fseek(fp,0,2); 2538 filesize = ftell(fp); 2539 rewind(fp); 2540 filebuf = (char *)malloc( filesize+1 ); 2541 if( filebuf==0 ){ 2542 ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.", 2543 filesize+1); 2544 gp->errorcnt++; 2545 return; 2546 } 2547 if( fread(filebuf,1,filesize,fp)!=filesize ){ 2548 ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", 2549 filesize); 2550 free(filebuf); 2551 gp->errorcnt++; 2552 return; 2553 } 2554 fclose(fp); 2555 filebuf[filesize] = 0; 2556 2557 /* Make an initial pass through the file to handle %ifdef and %ifndef */ 2558 preprocess_input(filebuf); 2559 2560 /* Now scan the text of the input file */ 2561 lineno = 1; 2562 for(cp=filebuf; (c= *cp)!=0; ){ 2563 if( c=='\n' ) lineno++; /* Keep track of the line number */ 2564 if( isspace(c) ){ cp++; continue; } /* Skip all white space */ 2565 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ 2566 cp+=2; 2567 while( (c= *cp)!=0 && c!='\n' ) cp++; 2568 continue; 2569 } 2570 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ 2571 cp+=2; 2572 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ 2573 if( c=='\n' ) lineno++; 2574 cp++; 2575 } 2576 if( c ) cp++; 2577 continue; 2578 } 2579 ps.tokenstart = cp; /* Mark the beginning of the token */ 2580 ps.tokenlineno = lineno; /* Linenumber on which token begins */ 2581 if( c=='\"' ){ /* String literals */ 2582 cp++; 2583 while( (c= *cp)!=0 && c!='\"' ){ 2584 if( c=='\n' ) lineno++; 2585 cp++; 2586 } 2587 if( c==0 ){ 2588 ErrorMsg(ps.filename,startline, 2589 "String starting on this line is not terminated before the end of the file."); 2590 ps.errorcnt++; 2591 nextcp = cp; 2592 }else{ 2593 nextcp = cp+1; 2594 } 2595 }else if( c=='{' ){ /* A block of C code */ 2596 int level; 2597 cp++; 2598 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ 2599 if( c=='\n' ) lineno++; 2600 else if( c=='{' ) level++; 2601 else if( c=='}' ) level--; 2602 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ 2603 int prevc; 2604 cp = &cp[2]; 2605 prevc = 0; 2606 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ 2607 if( c=='\n' ) lineno++; 2608 prevc = c; 2609 cp++; 2610 } 2611 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ 2612 cp = &cp[2]; 2613 while( (c= *cp)!=0 && c!='\n' ) cp++; 2614 if( c ) lineno++; 2615 }else if( c=='\'' || c=='\"' ){ /* String a character literals */ 2616 int startchar, prevc; 2617 startchar = c; 2618 prevc = 0; 2619 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ 2620 if( c=='\n' ) lineno++; 2621 if( prevc=='\\' ) prevc = 0; 2622 else prevc = c; 2623 } 2624 } 2625 } 2626 if( c==0 ){ 2627 ErrorMsg(ps.filename,ps.tokenlineno, 2628 "C code starting on this line is not terminated before the end of the file."); 2629 ps.errorcnt++; 2630 nextcp = cp; 2631 }else{ 2632 nextcp = cp+1; 2633 } 2634 }else if( isalnum(c) ){ /* Identifiers */ 2635 while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2636 nextcp = cp; 2637 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ 2638 cp += 3; 2639 nextcp = cp; 2640 }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){ 2641 cp += 2; 2642 while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++; 2643 nextcp = cp; 2644 }else{ /* All other (one character) operators */ 2645 cp++; 2646 nextcp = cp; 2647 } 2648 c = *cp; 2649 *cp = 0; /* Null terminate the token */ 2650 parseonetoken(&ps); /* Parse the token */ 2651 *cp = c; /* Restore the buffer */ 2652 cp = nextcp; 2653 } 2654 free(filebuf); /* Release the buffer after parsing */ 2655 gp->rule = ps.firstrule; 2656 gp->errorcnt = ps.errorcnt; 2657 } 2658 /*************************** From the file "plink.c" *********************/ 2659 /* 2660 ** Routines processing configuration follow-set propagation links 2661 ** in the LEMON parser generator. 2662 */ 2663 static struct plink *plink_freelist = 0; 2664 2665 /* Allocate a new plink */ 2666 struct plink *Plink_new(){ 2667 struct plink *newlink; 2668 2669 if( plink_freelist==0 ){ 2670 int i; 2671 int amt = 100; 2672 plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); 2673 if( plink_freelist==0 ){ 2674 fprintf(stderr, 2675 "Unable to allocate memory for a new follow-set propagation link.\n"); 2676 exit(1); 2677 } 2678 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; 2679 plink_freelist[amt-1].next = 0; 2680 } 2681 newlink = plink_freelist; 2682 plink_freelist = plink_freelist->next; 2683 return newlink; 2684 } 2685 2686 /* Add a plink to a plink list */ 2687 void Plink_add(struct plink **plpp, struct config *cfp) 2688 { 2689 struct plink *newlink; 2690 newlink = Plink_new(); 2691 newlink->next = *plpp; 2692 *plpp = newlink; 2693 newlink->cfp = cfp; 2694 } 2695 2696 /* Transfer every plink on the list "from" to the list "to" */ 2697 void Plink_copy(struct plink **to, struct plink *from) 2698 { 2699 struct plink *nextpl; 2700 while( from ){ 2701 nextpl = from->next; 2702 from->next = *to; 2703 *to = from; 2704 from = nextpl; 2705 } 2706 } 2707 2708 /* Delete every plink on the list */ 2709 void Plink_delete(struct plink *plp) 2710 { 2711 struct plink *nextpl; 2712 2713 while( plp ){ 2714 nextpl = plp->next; 2715 plp->next = plink_freelist; 2716 plink_freelist = plp; 2717 plp = nextpl; 2718 } 2719 } 2720 /*********************** From the file "report.c" **************************/ 2721 /* 2722 ** Procedures for generating reports and tables in the LEMON parser generator. 2723 */ 2724 2725 /* Generate a filename with the given suffix. Space to hold the 2726 ** name comes from malloc() and must be freed by the calling 2727 ** function. 2728 */ 2729 PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) 2730 { 2731 char *name; 2732 char *cp; 2733 2734 name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 ); 2735 if( name==0 ){ 2736 fprintf(stderr,"Can't allocate space for a filename.\n"); 2737 exit(1); 2738 } 2739 strcpy(name,lemp->filename); 2740 cp = strrchr(name,'.'); 2741 if( cp ) *cp = 0; 2742 strcat(name,suffix); 2743 return name; 2744 } 2745 2746 /* Open a file with a name based on the name of the input file, 2747 ** but with a different (specified) suffix, and return a pointer 2748 ** to the stream */ 2749 PRIVATE FILE *file_open( 2750 struct lemon *lemp, 2751 const char *suffix, 2752 const char *mode 2753 ){ 2754 FILE *fp; 2755 2756 if( lemp->outname ) free(lemp->outname); 2757 lemp->outname = file_makename(lemp, suffix); 2758 fp = fopen(lemp->outname,mode); 2759 if( fp==0 && *mode=='w' ){ 2760 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); 2761 lemp->errorcnt++; 2762 return 0; 2763 } 2764 2765 /* Add files we create to a list, so we can delete them if we fail. This 2766 ** is to keep makefiles from getting confused. We don't include .out files, 2767 ** though: this is debug information, and you don't want it deleted if there 2768 ** was an error you need to track down. 2769 */ 2770 if(( *mode=='w' ) && (strcmp(suffix, ".out") != 0)){ 2771 const char **ptr = (const char **) 2772 realloc(made_files, sizeof (const char **) * (made_files_count + 1)); 2773 const char *fname = Strsafe(lemp->outname); 2774 if ((ptr == NULL) || (fname == NULL)) { 2775 free(ptr); 2776 memory_error(); 2777 } 2778 made_files = ptr; 2779 made_files[made_files_count++] = fname; 2780 } 2781 return fp; 2782 } 2783 2784 /* Duplicate the input file without comments and without actions 2785 ** on rules */ 2786 void Reprint(struct lemon *lemp) 2787 { 2788 struct rule *rp; 2789 struct symbol *sp; 2790 int i, j, maxlen, len, ncolumns, skip; 2791 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); 2792 maxlen = 10; 2793 for(i=0; i<lemp->nsymbol; i++){ 2794 sp = lemp->symbols[i]; 2795 len = lemonStrlen(sp->name); 2796 if( len>maxlen ) maxlen = len; 2797 } 2798 ncolumns = 76/(maxlen+5); 2799 if( ncolumns<1 ) ncolumns = 1; 2800 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; 2801 for(i=0; i<skip; i++){ 2802 printf("//"); 2803 for(j=i; j<lemp->nsymbol; j+=skip){ 2804 sp = lemp->symbols[j]; 2805 assert( sp->index==j ); 2806 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); 2807 } 2808 printf("\n"); 2809 } 2810 for(rp=lemp->rule; rp; rp=rp->next){ 2811 printf("%s",rp->lhs->name); 2812 /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ 2813 printf(" ::="); 2814 for(i=0; i<rp->nrhs; i++){ 2815 sp = rp->rhs[i]; 2816 printf(" %s", sp->name); 2817 if( sp->type==MULTITERMINAL ){ 2818 for(j=1; j<sp->nsubsym; j++){ 2819 printf("|%s", sp->subsym[j]->name); 2820 } 2821 } 2822 /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ 2823 } 2824 printf("."); 2825 if( rp->precsym ) printf(" [%s]",rp->precsym->name); 2826 /* if( rp->code ) printf("\n %s",rp->code); */ 2827 printf("\n"); 2828 } 2829 } 2830 2831 void ConfigPrint(FILE *fp, struct config *cfp) 2832 { 2833 struct rule *rp; 2834 struct symbol *sp; 2835 int i, j; 2836 rp = cfp->rp; 2837 fprintf(fp,"%s ::=",rp->lhs->name); 2838 for(i=0; i<=rp->nrhs; i++){ 2839 if( i==cfp->dot ) fprintf(fp," *"); 2840 if( i==rp->nrhs ) break; 2841 sp = rp->rhs[i]; 2842 fprintf(fp," %s", sp->name); 2843 if( sp->type==MULTITERMINAL ){ 2844 for(j=1; j<sp->nsubsym; j++){ 2845 fprintf(fp,"|%s",sp->subsym[j]->name); 2846 } 2847 } 2848 } 2849 } 2850 2851 /* #define TEST */ 2852 #if 0 2853 /* Print a set */ 2854 PRIVATE void SetPrint(out,set,lemp) 2855 FILE *out; 2856 char *set; 2857 struct lemon *lemp; 2858 { 2859 int i; 2860 char *spacer; 2861 spacer = ""; 2862 fprintf(out,"%12s[",""); 2863 for(i=0; i<lemp->nterminal; i++){ 2864 if( SetFind(set,i) ){ 2865 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); 2866 spacer = " "; 2867 } 2868 } 2869 fprintf(out,"]\n"); 2870 } 2871 2872 /* Print a plink chain */ 2873 PRIVATE void PlinkPrint(out,plp,tag) 2874 FILE *out; 2875 struct plink *plp; 2876 char *tag; 2877 { 2878 while( plp ){ 2879 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); 2880 ConfigPrint(out,plp->cfp); 2881 fprintf(out,"\n"); 2882 plp = plp->next; 2883 } 2884 } 2885 #endif 2886 2887 /* Print an action to the given file descriptor. Return FALSE if 2888 ** nothing was actually printed. 2889 */ 2890 int PrintAction(struct action *ap, FILE *fp, int indent){ 2891 int result = 1; 2892 switch( ap->type ){ 2893 case SHIFT: 2894 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum); 2895 break; 2896 case REDUCE: 2897 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); 2898 break; 2899 case ACCEPT: 2900 fprintf(fp,"%*s accept",indent,ap->sp->name); 2901 break; 2902 case ERROR: 2903 fprintf(fp,"%*s error",indent,ap->sp->name); 2904 break; 2905 case SRCONFLICT: 2906 case RRCONFLICT: 2907 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", 2908 indent,ap->sp->name,ap->x.rp->index); 2909 break; 2910 case SSCONFLICT: 2911 fprintf(fp,"%*s shift %-3d ** Parsing conflict **", 2912 indent,ap->sp->name,ap->x.stp->statenum); 2913 break; 2914 case SH_RESOLVED: 2915 if( showPrecedenceConflict ){ 2916 fprintf(fp,"%*s shift %-3d -- dropped by precedence", 2917 indent,ap->sp->name,ap->x.stp->statenum); 2918 }else{ 2919 result = 0; 2920 } 2921 break; 2922 case RD_RESOLVED: 2923 if( showPrecedenceConflict ){ 2924 fprintf(fp,"%*s reduce %-3d -- dropped by precedence", 2925 indent,ap->sp->name,ap->x.rp->index); 2926 }else{ 2927 result = 0; 2928 } 2929 break; 2930 case NOT_USED: 2931 result = 0; 2932 break; 2933 } 2934 return result; 2935 } 2936 2937 /* Generate the "y.output" log file */ 2938 void ReportOutput(struct lemon *lemp) 2939 { 2940 int i; 2941 struct state *stp; 2942 struct config *cfp; 2943 struct action *ap; 2944 FILE *fp; 2945 2946 fp = file_open(lemp,".out","wb"); 2947 if( fp==0 ) return; 2948 for(i=0; i<lemp->nstate; i++){ 2949 stp = lemp->sorted[i]; 2950 fprintf(fp,"State %d:\n",stp->statenum); 2951 if( lemp->basisflag ) cfp=stp->bp; 2952 else cfp=stp->cfp; 2953 while( cfp ){ 2954 char buf[20]; 2955 if( cfp->dot==cfp->rp->nrhs ){ 2956 sprintf(buf,"(%d)",cfp->rp->index); 2957 fprintf(fp," %5s ",buf); 2958 }else{ 2959 fprintf(fp," "); 2960 } 2961 ConfigPrint(fp,cfp); 2962 fprintf(fp,"\n"); 2963 #if 0 2964 SetPrint(fp,cfp->fws,lemp); 2965 PlinkPrint(fp,cfp->fplp,"To "); 2966 PlinkPrint(fp,cfp->bplp,"From"); 2967 #endif 2968 if( lemp->basisflag ) cfp=cfp->bp; 2969 else cfp=cfp->next; 2970 } 2971 fprintf(fp,"\n"); 2972 for(ap=stp->ap; ap; ap=ap->next){ 2973 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); 2974 } 2975 fprintf(fp,"\n"); 2976 } 2977 fprintf(fp, "----------------------------------------------------\n"); 2978 fprintf(fp, "Symbols:\n"); 2979 for(i=0; i<lemp->nsymbol; i++){ 2980 int j; 2981 struct symbol *sp; 2982 2983 sp = lemp->symbols[i]; 2984 fprintf(fp, " %3d: %s", i, sp->name); 2985 if( sp->type==NONTERMINAL ){ 2986 fprintf(fp, ":"); 2987 if( sp->lambda ){ 2988 fprintf(fp, " <lambda>"); 2989 } 2990 for(j=0; j<lemp->nterminal; j++){ 2991 if( sp->firstset && SetFind(sp->firstset, j) ){ 2992 fprintf(fp, " %s", lemp->symbols[j]->name); 2993 } 2994 } 2995 } 2996 fprintf(fp, "\n"); 2997 } 2998 fclose(fp); 2999 return; 3000 } 3001 3002 /* Search for the file "name" which is in the same directory as 3003 ** the exacutable */ 3004 PRIVATE char *pathsearch(char *argv0, char *name, int modemask) 3005 { 3006 const char *pathlist; 3007 char *pathbufptr; 3008 char *pathbuf; 3009 char *path,*cp; 3010 char c; 3011 3012 #ifdef __WIN32__ 3013 cp = strrchr(argv0,'\\'); 3014 #else 3015 cp = strrchr(argv0,'/'); 3016 #endif 3017 if( cp ){ 3018 c = *cp; 3019 *cp = 0; 3020 path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); 3021 if( path ) sprintf(path,"%s/%s",argv0,name); 3022 *cp = c; 3023 }else{ 3024 pathlist = getenv("PATH"); 3025 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; 3026 pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); 3027 path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); 3028 if( (pathbuf != 0) && (path!=0) ){ 3029 pathbufptr = pathbuf; 3030 strcpy(pathbuf, pathlist); 3031 while( *pathbuf ){ 3032 cp = strchr(pathbuf,':'); 3033 if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; 3034 c = *cp; 3035 *cp = 0; 3036 sprintf(path,"%s/%s",pathbuf,name); 3037 *cp = c; 3038 if( c==0 ) pathbuf[0] = 0; 3039 else pathbuf = &cp[1]; 3040 if( access(path,modemask)==0 ) break; 3041 } 3042 free(pathbufptr); 3043 } 3044 } 3045 return path; 3046 } 3047 3048 /* Given an action, compute the integer value for that action 3049 ** which is to be put in the action table of the generated machine. 3050 ** Return negative if no action should be generated. 3051 */ 3052 PRIVATE int compute_action(struct lemon *lemp, struct action *ap) 3053 { 3054 int act; 3055 switch( ap->type ){ 3056 case SHIFT: act = ap->x.stp->statenum; break; 3057 case REDUCE: act = ap->x.rp->index + lemp->nstate; break; 3058 case ERROR: act = lemp->nstate + lemp->nrule; break; 3059 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; 3060 default: act = -1; break; 3061 } 3062 return act; 3063 } 3064 3065 #define LINESIZE 1000 3066 /* The next cluster of routines are for reading the template file 3067 ** and writing the results to the generated parser */ 3068 /* The first function transfers data from "in" to "out" until 3069 ** a line is seen which begins with "%%". The line number is 3070 ** tracked. 3071 ** 3072 ** if name!=0, then any word that begin with "Parse" is changed to 3073 ** begin with *name instead. 3074 */ 3075 PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) 3076 { 3077 int i, iStart; 3078 char line[LINESIZE]; 3079 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ 3080 (*lineno)++; 3081 iStart = 0; 3082 if( name ){ 3083 for(i=0; line[i]; i++){ 3084 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 3085 && (i==0 || !isalpha(line[i-1])) 3086 ){ 3087 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); 3088 fprintf(out,"%s",name); 3089 i += 4; 3090 iStart = i+1; 3091 } 3092 } 3093 } 3094 fprintf(out,"%s",&line[iStart]); 3095 } 3096 } 3097 3098 /* The next function finds the template file and opens it, returning 3099 ** a pointer to the opened file. */ 3100 PRIVATE FILE *tplt_open(struct lemon *lemp) 3101 { 3102 static char templatename[] = "lempar.c"; 3103 char buf[1000]; 3104 FILE *in; 3105 char *tpltname; 3106 char *cp; 3107 3108 /* first, see if user specified a template filename on the command line. */ 3109 if (user_templatename != 0) { 3110 if( access(user_templatename,004)==-1 ){ 3111 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 3112 user_templatename); 3113 lemp->errorcnt++; 3114 return 0; 3115 } 3116 in = fopen(user_templatename,"rb"); 3117 if( in==0 ){ 3118 fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename); 3119 lemp->errorcnt++; 3120 return 0; 3121 } 3122 return in; 3123 } 3124 3125 cp = strrchr(lemp->filename,'.'); 3126 if( cp ){ 3127 sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); 3128 }else{ 3129 sprintf(buf,"%s.lt",lemp->filename); 3130 } 3131 if( access(buf,004)==0 ){ 3132 tpltname = buf; 3133 }else if( access(templatename,004)==0 ){ 3134 tpltname = templatename; 3135 }else{ 3136 tpltname = pathsearch(lemp->argv0,templatename,0); 3137 } 3138 if( tpltname==0 ){ 3139 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", 3140 templatename); 3141 lemp->errorcnt++; 3142 return 0; 3143 } 3144 in = fopen(tpltname,"rb"); 3145 if( in==0 ){ 3146 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); 3147 lemp->errorcnt++; 3148 return 0; 3149 } 3150 return in; 3151 } 3152 3153 /* Print a #line directive line to the output file. */ 3154 PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) 3155 { 3156 fprintf(out,"#line %d \"",lineno); 3157 while( *filename ){ 3158 if( *filename == '\\' ) putc('\\',out); 3159 putc(*filename,out); 3160 filename++; 3161 } 3162 fprintf(out,"\"\n"); 3163 } 3164 3165 /* Print a string to the file and keep the linenumber up to date */ 3166 PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) 3167 { 3168 if( str==0 ) return; 3169 while( *str ){ 3170 putc(*str,out); 3171 if( *str=='\n' ) (*lineno)++; 3172 str++; 3173 } 3174 if( str[-1]!='\n' ){ 3175 putc('\n',out); 3176 (*lineno)++; 3177 } 3178 if (!lemp->nolinenosflag) { 3179 (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); 3180 } 3181 return; 3182 } 3183 3184 /* 3185 ** The following routine emits code for the destructor for the 3186 ** symbol sp 3187 */ 3188 void emit_destructor_code( 3189 FILE *out, 3190 struct symbol *sp, 3191 struct lemon *lemp, 3192 int *lineno 3193 ){ 3194 char *cp = 0; 3195 3196 if( sp->type==TERMINAL ){ 3197 cp = lemp->tokendest; 3198 if( cp==0 ) return; 3199 fprintf(out,"{\n"); (*lineno)++; 3200 }else if( sp->destructor ){ 3201 cp = sp->destructor; 3202 fprintf(out,"{\n"); (*lineno)++; 3203 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); } 3204 }else if( lemp->vardest ){ 3205 cp = lemp->vardest; 3206 if( cp==0 ) return; 3207 fprintf(out,"{\n"); (*lineno)++; 3208 }else{ 3209 assert( 0 ); /* Cannot happen */ 3210 } 3211 for(; *cp; cp++){ 3212 if( *cp=='$' && cp[1]=='$' ){ 3213 fprintf(out,"(yypminor->yy%d)",sp->dtnum); 3214 cp++; 3215 continue; 3216 } 3217 if( *cp=='\n' ) (*lineno)++; 3218 fputc(*cp,out); 3219 } 3220 fprintf(out,"\n"); (*lineno)++; 3221 if (!lemp->nolinenosflag) { 3222 (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); 3223 } 3224 fprintf(out,"}\n"); (*lineno)++; 3225 return; 3226 } 3227 3228 /* 3229 ** Return TRUE (non-zero) if the given symbol has a destructor. 3230 */ 3231 int has_destructor(struct symbol *sp, struct lemon *lemp) 3232 { 3233 int ret; 3234 if( sp->type==TERMINAL ){ 3235 ret = lemp->tokendest!=0; 3236 }else{ 3237 ret = lemp->vardest!=0 || sp->destructor!=0; 3238 } 3239 return ret; 3240 } 3241 3242 /* 3243 ** Append text to a dynamically allocated string. If zText is 0 then 3244 ** reset the string to be empty again. Always return the complete text 3245 ** of the string (which is overwritten with each call). 3246 ** 3247 ** n bytes of zText are stored. If n==0 then all of zText up to the first 3248 ** \000 terminator is stored. zText can contain up to two instances of 3249 ** %d. The values of p1 and p2 are written into the first and second 3250 ** %d. 3251 ** 3252 ** If n==-1, then the previous character is overwritten. 3253 */ 3254 PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ 3255 static char empty[1] = { 0 }; 3256 static char *z = 0; 3257 static int alloced = 0; 3258 static int used = 0; 3259 int c; 3260 char zInt[40]; 3261 if( zText==0 ){ 3262 used = 0; 3263 return z; 3264 } 3265 if( n<=0 ){ 3266 if( n<0 ){ 3267 used += n; 3268 assert( used>=0 ); 3269 } 3270 n = lemonStrlen(zText); 3271 } 3272 if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ 3273 alloced = n + sizeof(zInt)*2 + used + 200; 3274 z = (char *) realloc(z, alloced); 3275 } 3276 if( z==0 ) return empty; 3277 while( n-- > 0 ){ 3278 c = *(zText++); 3279 if( c=='%' && n>0 && zText[0]=='d' ){ 3280 sprintf(zInt, "%d", p1); 3281 p1 = p2; 3282 strcpy(&z[used], zInt); 3283 used += lemonStrlen(&z[used]); 3284 zText++; 3285 n--; 3286 }else{ 3287 z[used++] = c; 3288 } 3289 } 3290 z[used] = 0; 3291 return z; 3292 } 3293 3294 /* 3295 ** zCode is a string that is the action associated with a rule. Expand 3296 ** the symbols in this string so that the refer to elements of the parser 3297 ** stack. 3298 */ 3299 PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){ 3300 char *cp, *xp; 3301 int i; 3302 char lhsused = 0; /* True if the LHS element has been used */ 3303 char used[MAXRHS]; /* True for each RHS element which is used */ 3304 3305 for(i=0; i<rp->nrhs; i++) used[i] = 0; 3306 lhsused = 0; 3307 3308 if( rp->code==0 ){ 3309 static char newlinestr[2] = { '\n', '\0' }; 3310 rp->code = newlinestr; 3311 rp->line = rp->ruleline; 3312 } 3313 3314 append_str(0,0,0,0); 3315 3316 /* This const cast is wrong but harmless, if we're careful. */ 3317 for(cp=(char *)rp->code; *cp; cp++){ 3318 if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){ 3319 char saved; 3320 for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++); 3321 saved = *xp; 3322 *xp = 0; 3323 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ 3324 append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0); 3325 cp = xp; 3326 lhsused = 1; 3327 }else{ 3328 for(i=0; i<rp->nrhs; i++){ 3329 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ 3330 if( cp!=rp->code && cp[-1]=='@' ){ 3331 /* If the argument is of the form @X then substituted 3332 ** the token number of X, not the value of X */ 3333 append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); 3334 }else{ 3335 struct symbol *sp = rp->rhs[i]; 3336 int dtnum; 3337 if( sp->type==MULTITERMINAL ){ 3338 dtnum = sp->subsym[0]->dtnum; 3339 }else{ 3340 dtnum = sp->dtnum; 3341 } 3342 append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); 3343 } 3344 cp = xp; 3345 used[i] = 1; 3346 break; 3347 } 3348 } 3349 } 3350 *xp = saved; 3351 } 3352 append_str(cp, 1, 0, 0); 3353 } /* End loop */ 3354 3355 /* Check to make sure the LHS has been used */ 3356 if( rp->lhsalias && !lhsused ){ 3357 ErrorMsg(lemp->filename,rp->ruleline, 3358 "Label \"%s\" for \"%s(%s)\" is never used.", 3359 rp->lhsalias,rp->lhs->name,rp->lhsalias); 3360 lemp->errorcnt++; 3361 } 3362 3363 /* Generate destructor code for RHS symbols which are not used in the 3364 ** reduce code */ 3365 for(i=0; i<rp->nrhs; i++){ 3366 if( rp->rhsalias[i] && !used[i] ){ 3367 ErrorMsg(lemp->filename,rp->ruleline, 3368 "Label %s for \"%s(%s)\" is never used.", 3369 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); 3370 lemp->errorcnt++; 3371 }else if( rp->rhsalias[i]==0 ){ 3372 if( has_destructor(rp->rhs[i],lemp) ){ 3373 append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, 3374 rp->rhs[i]->index,i-rp->nrhs+1); 3375 }else{ 3376 /* No destructor defined for this term */ 3377 } 3378 } 3379 } 3380 if( rp->code ){ 3381 cp = append_str(0,0,0,0); 3382 rp->code = Strsafe(cp?cp:""); 3383 } 3384 } 3385 3386 /* 3387 ** Generate code which executes when the rule "rp" is reduced. Write 3388 ** the code to "out". Make sure lineno stays up-to-date. 3389 */ 3390 PRIVATE void emit_code( 3391 FILE *out, 3392 struct rule *rp, 3393 struct lemon *lemp, 3394 int *lineno 3395 ){ 3396 const char *cp; 3397 3398 /* Generate code to do the reduce action */ 3399 if( rp->code ){ 3400 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } 3401 fprintf(out,"{%s",rp->code); 3402 for(cp=rp->code; *cp; cp++){ 3403 if( *cp=='\n' ) (*lineno)++; 3404 } /* End loop */ 3405 fprintf(out,"}\n"); (*lineno)++; 3406 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } 3407 } /* End if( rp->code ) */ 3408 3409 return; 3410 } 3411 3412 /* 3413 ** Print the definition of the union used for the parser's data stack. 3414 ** This union contains fields for every possible data type for tokens 3415 ** and nonterminals. In the process of computing and printing this 3416 ** union, also set the ".dtnum" field of every terminal and nonterminal 3417 ** symbol. 3418 */ 3419 void print_stack_union( 3420 FILE *out, /* The output stream */ 3421 struct lemon *lemp, /* The main info structure for this parser */ 3422 int *plineno, /* Pointer to the line number */ 3423 int mhflag /* True if generating makeheaders output */ 3424 ){ 3425 int lineno = *plineno; /* The line number of the output */ 3426 char **types; /* A hash table of datatypes */ 3427 int arraysize; /* Size of the "types" array */ 3428 int maxdtlength; /* Maximum length of any ".datatype" field. */ 3429 char *stddt; /* Standardized name for a datatype */ 3430 int i,j; /* Loop counters */ 3431 int hash; /* For hashing the name of a type */ 3432 const char *name; /* Name of the parser */ 3433 3434 /* Allocate and initialize types[] and allocate stddt[] */ 3435 arraysize = lemp->nsymbol * 2; 3436 types = (char**)calloc( arraysize, sizeof(char*) ); 3437 for(i=0; i<arraysize; i++) types[i] = 0; 3438 maxdtlength = 0; 3439 if( lemp->vartype ){ 3440 maxdtlength = lemonStrlen(lemp->vartype); 3441 } 3442 for(i=0; i<lemp->nsymbol; i++){ 3443 int len; 3444 struct symbol *sp = lemp->symbols[i]; 3445 if( sp->datatype==0 ) continue; 3446 len = lemonStrlen(sp->datatype); 3447 if( len>maxdtlength ) maxdtlength = len; 3448 } 3449 stddt = (char*)malloc( maxdtlength*2 + 1 ); 3450 if( types==0 || stddt==0 ){ 3451 fprintf(stderr,"Out of memory.\n"); 3452 exit(1); 3453 } 3454 3455 /* Build a hash table of datatypes. The ".dtnum" field of each symbol 3456 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is 3457 ** used for terminal symbols. If there is no %default_type defined then 3458 ** 0 is also used as the .dtnum value for nonterminals which do not specify 3459 ** a datatype using the %type directive. 3460 */ 3461 for(i=0; i<lemp->nsymbol; i++){ 3462 struct symbol *sp = lemp->symbols[i]; 3463 char *cp; 3464 if( sp==lemp->errsym ){ 3465 sp->dtnum = arraysize+1; 3466 continue; 3467 } 3468 if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ 3469 sp->dtnum = 0; 3470 continue; 3471 } 3472 cp = sp->datatype; 3473 if( cp==0 ) cp = lemp->vartype; 3474 j = 0; 3475 while( isspace(*cp) ) cp++; 3476 while( *cp ) stddt[j++] = *cp++; 3477 while( j>0 && isspace(stddt[j-1]) ) j--; 3478 stddt[j] = 0; 3479 if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ 3480 sp->dtnum = 0; 3481 continue; 3482 } 3483 hash = 0; 3484 for(j=0; stddt[j]; j++){ 3485 hash = hash*53 + stddt[j]; 3486 } 3487 hash = (hash & 0x7fffffff)%arraysize; 3488 while( types[hash] ){ 3489 if( strcmp(types[hash],stddt)==0 ){ 3490 sp->dtnum = hash + 1; 3491 break; 3492 } 3493 hash++; 3494 if( hash>=arraysize ) hash = 0; 3495 } 3496 if( types[hash]==0 ){ 3497 sp->dtnum = hash + 1; 3498 types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); 3499 if( types[hash]==0 ){ 3500 fprintf(stderr,"Out of memory.\n"); 3501 exit(1); 3502 } 3503 strcpy(types[hash],stddt); 3504 } 3505 } 3506 3507 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ 3508 name = lemp->name ? lemp->name : "Parse"; 3509 lineno = *plineno; 3510 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } 3511 fprintf(out,"#define %sTOKENTYPE %s\n",name, 3512 lemp->tokentype?lemp->tokentype:"void*"); lineno++; 3513 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } 3514 fprintf(out,"typedef union {\n"); lineno++; 3515 fprintf(out," int yyinit;\n"); lineno++; 3516 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; 3517 for(i=0; i<arraysize; i++){ 3518 if( types[i]==0 ) continue; 3519 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++; 3520 free(types[i]); 3521 } 3522 if( lemp->errsym->useCnt ){ 3523 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; 3524 } 3525 free(stddt); 3526 free(types); 3527 fprintf(out,"} YYMINORTYPE;\n"); lineno++; 3528 *plineno = lineno; 3529 } 3530 3531 /* 3532 ** Return the name of a C datatype able to represent values between 3533 ** lwr and upr, inclusive. 3534 */ 3535 static const char *minimum_size_type(int lwr, int upr){ 3536 if( lwr>=0 ){ 3537 if( upr<=255 ){ 3538 return "unsigned char"; 3539 }else if( upr<65535 ){ 3540 return "unsigned short int"; 3541 }else{ 3542 return "unsigned int"; 3543 } 3544 }else if( lwr>=-127 && upr<=127 ){ 3545 return "signed char"; 3546 }else if( lwr>=-32767 && upr<32767 ){ 3547 return "short"; 3548 }else{ 3549 return "int"; 3550 } 3551 } 3552 3553 /* 3554 ** Each state contains a set of token transaction and a set of 3555 ** nonterminal transactions. Each of these sets makes an instance 3556 ** of the following structure. An array of these structures is used 3557 ** to order the creation of entries in the yy_action[] table. 3558 */ 3559 struct axset { 3560 struct state *stp; /* A pointer to a state */ 3561 int isTkn; /* True to use tokens. False for non-terminals */ 3562 int nAction; /* Number of actions */ 3563 int iOrder; /* Original order of action sets */ 3564 }; 3565 3566 /* 3567 ** Compare to axset structures for sorting purposes 3568 */ 3569 static int axset_compare(const void *a, const void *b){ 3570 struct axset *p1 = (struct axset*)a; 3571 struct axset *p2 = (struct axset*)b; 3572 int c; 3573 c = p2->nAction - p1->nAction; 3574 if( c==0 ){ 3575 c = p2->iOrder - p1->iOrder; 3576 } 3577 assert( c!=0 || p1==p2 ); 3578 return c; 3579 } 3580 3581 /* 3582 ** Write text on "out" that describes the rule "rp". 3583 */ 3584 static void writeRuleText(FILE *out, struct rule *rp){ 3585 int j; 3586 fprintf(out,"%s ::=", rp->lhs->name); 3587 for(j=0; j<rp->nrhs; j++){ 3588 struct symbol *sp = rp->rhs[j]; 3589 fprintf(out," %s", sp->name); 3590 if( sp->type==MULTITERMINAL ){ 3591 int k; 3592 for(k=1; k<sp->nsubsym; k++){ 3593 fprintf(out,"|%s",sp->subsym[k]->name); 3594 } 3595 } 3596 } 3597 } 3598 3599 3600 /* Generate C source code for the parser */ 3601 void ReportTable( 3602 struct lemon *lemp, 3603 int mhflag /* Output in makeheaders format if true */ 3604 ){ 3605 FILE *out, *in; 3606 char line[LINESIZE]; 3607 int lineno; 3608 struct state *stp; 3609 struct action *ap; 3610 struct rule *rp; 3611 struct acttab *pActtab; 3612 int i, j, n; 3613 const char *name; 3614 int mnTknOfst, mxTknOfst; 3615 int mnNtOfst, mxNtOfst; 3616 struct axset *ax; 3617 3618 in = tplt_open(lemp); 3619 if( in==0 ) return; 3620 out = file_open(lemp,".c","wb"); 3621 if( out==0 ){ 3622 fclose(in); 3623 return; 3624 } 3625 lineno = 1; 3626 tplt_xfer(lemp->name,in,out,&lineno); 3627 3628 /* Generate the include code, if any */ 3629 tplt_print(out,lemp,lemp->include,&lineno); 3630 if( mhflag ){ 3631 char *name = file_makename(lemp, ".h"); 3632 fprintf(out,"#include \"%s\"\n", name); lineno++; 3633 free(name); 3634 } 3635 tplt_xfer(lemp->name,in,out,&lineno); 3636 3637 /* Generate #defines for all tokens */ 3638 if( mhflag ){ 3639 const char *prefix; 3640 fprintf(out,"#if INTERFACE\n"); lineno++; 3641 if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 3642 else prefix = ""; 3643 for(i=1; i<lemp->nterminal; i++){ 3644 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 3645 lineno++; 3646 } 3647 fprintf(out,"#endif\n"); lineno++; 3648 } 3649 tplt_xfer(lemp->name,in,out,&lineno); 3650 3651 /* Generate the defines */ 3652 fprintf(out,"#define YYCODETYPE %s\n", 3653 minimum_size_type(0, lemp->nsymbol+1)); lineno++; 3654 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; 3655 fprintf(out,"#define YYACTIONTYPE %s\n", 3656 minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; 3657 if( lemp->wildcard ){ 3658 fprintf(out,"#define YYWILDCARD %d\n", 3659 lemp->wildcard->index); lineno++; 3660 } 3661 print_stack_union(out,lemp,&lineno,mhflag); 3662 fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; 3663 if( lemp->stacksize ){ 3664 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; 3665 }else{ 3666 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; 3667 } 3668 fprintf(out, "#endif\n"); lineno++; 3669 if( mhflag ){ 3670 fprintf(out,"#if INTERFACE\n"); lineno++; 3671 } 3672 name = lemp->name ? lemp->name : "Parse"; 3673 if( lemp->arg && lemp->arg[0] ){ 3674 int i; 3675 i = lemonStrlen(lemp->arg); 3676 while( i>=1 && isspace(lemp->arg[i-1]) ) i--; 3677 while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; 3678 fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; 3679 fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; 3680 fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", 3681 name,lemp->arg,&lemp->arg[i]); lineno++; 3682 fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", 3683 name,&lemp->arg[i],&lemp->arg[i]); lineno++; 3684 }else{ 3685 fprintf(out,"#define %sARG_SDECL\n",name); lineno++; 3686 fprintf(out,"#define %sARG_PDECL\n",name); lineno++; 3687 fprintf(out,"#define %sARG_FETCH\n",name); lineno++; 3688 fprintf(out,"#define %sARG_STORE\n",name); lineno++; 3689 } 3690 if( mhflag ){ 3691 fprintf(out,"#endif\n"); lineno++; 3692 } 3693 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; 3694 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; 3695 if( lemp->errsym->useCnt ){ 3696 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; 3697 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; 3698 } 3699 if( lemp->has_fallback ){ 3700 fprintf(out,"#define YYFALLBACK 1\n"); lineno++; 3701 } 3702 tplt_xfer(lemp->name,in,out,&lineno); 3703 3704 /* Generate the action table and its associates: 3705 ** 3706 ** yy_action[] A single table containing all actions. 3707 ** yy_lookahead[] A table containing the lookahead for each entry in 3708 ** yy_action. Used to detect hash collisions. 3709 ** yy_shift_ofst[] For each state, the offset into yy_action for 3710 ** shifting terminals. 3711 ** yy_reduce_ofst[] For each state, the offset into yy_action for 3712 ** shifting non-terminals after a reduce. 3713 ** yy_default[] Default action for each state. 3714 */ 3715 3716 /* Compute the actions on all states and count them up */ 3717 ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0])); 3718 if( ax==0 ){ 3719 fprintf(stderr,"malloc failed\n"); 3720 exit(1); 3721 } 3722 for(i=0; i<lemp->nstate; i++){ 3723 stp = lemp->sorted[i]; 3724 ax[i*2].stp = stp; 3725 ax[i*2].isTkn = 1; 3726 ax[i*2].nAction = stp->nTknAct; 3727 ax[i*2+1].stp = stp; 3728 ax[i*2+1].isTkn = 0; 3729 ax[i*2+1].nAction = stp->nNtAct; 3730 } 3731 mxTknOfst = mnTknOfst = 0; 3732 mxNtOfst = mnNtOfst = 0; 3733 3734 /* Compute the action table. In order to try to keep the size of the 3735 ** action table to a minimum, the heuristic of placing the largest action 3736 ** sets first is used. 3737 */ 3738 for(i=0; i<lemp->nstate*2; i++) ax[i].iOrder = i; 3739 qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); 3740 pActtab = acttab_alloc(); 3741 for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){ 3742 stp = ax[i].stp; 3743 if( ax[i].isTkn ){ 3744 for(ap=stp->ap; ap; ap=ap->next){ 3745 int action; 3746 if( ap->sp->index>=lemp->nterminal ) continue; 3747 action = compute_action(lemp, ap); 3748 if( action<0 ) continue; 3749 acttab_action(pActtab, ap->sp->index, action); 3750 } 3751 stp->iTknOfst = acttab_insert(pActtab); 3752 if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; 3753 if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; 3754 }else{ 3755 for(ap=stp->ap; ap; ap=ap->next){ 3756 int action; 3757 if( ap->sp->index<lemp->nterminal ) continue; 3758 if( ap->sp->index==lemp->nsymbol ) continue; 3759 action = compute_action(lemp, ap); 3760 if( action<0 ) continue; 3761 acttab_action(pActtab, ap->sp->index, action); 3762 } 3763 stp->iNtOfst = acttab_insert(pActtab); 3764 if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; 3765 if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; 3766 } 3767 } 3768 free(ax); 3769 3770 /* Output the yy_action table */ 3771 n = acttab_size(pActtab); 3772 fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; 3773 fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; 3774 for(i=j=0; i<n; i++){ 3775 int action = acttab_yyaction(pActtab, i); 3776 if( action<0 ) action = lemp->nstate + lemp->nrule + 2; 3777 if( j==0 ) fprintf(out," /* %5d */ ", i); 3778 fprintf(out, " %4d,", action); 3779 if( j==9 || i==n-1 ){ 3780 fprintf(out, "\n"); lineno++; 3781 j = 0; 3782 }else{ 3783 j++; 3784 } 3785 } 3786 fprintf(out, "};\n"); lineno++; 3787 3788 /* Output the yy_lookahead table */ 3789 fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; 3790 for(i=j=0; i<n; i++){ 3791 int la = acttab_yylookahead(pActtab, i); 3792 if( la<0 ) la = lemp->nsymbol; 3793 if( j==0 ) fprintf(out," /* %5d */ ", i); 3794 fprintf(out, " %4d,", la); 3795 if( j==9 || i==n-1 ){ 3796 fprintf(out, "\n"); lineno++; 3797 j = 0; 3798 }else{ 3799 j++; 3800 } 3801 } 3802 fprintf(out, "};\n"); lineno++; 3803 3804 /* Output the yy_shift_ofst[] table */ 3805 fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; 3806 n = lemp->nstate; 3807 while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; 3808 fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; 3809 fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; 3810 fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; 3811 fprintf(out, "static const %s yy_shift_ofst[] = {\n", 3812 minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; 3813 for(i=j=0; i<n; i++){ 3814 int ofst; 3815 stp = lemp->sorted[i]; 3816 ofst = stp->iTknOfst; 3817 if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; 3818 if( j==0 ) fprintf(out," /* %5d */ ", i); 3819 fprintf(out, " %4d,", ofst); 3820 if( j==9 || i==n-1 ){ 3821 fprintf(out, "\n"); lineno++; 3822 j = 0; 3823 }else{ 3824 j++; 3825 } 3826 } 3827 fprintf(out, "};\n"); lineno++; 3828 3829 /* Output the yy_reduce_ofst[] table */ 3830 fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; 3831 n = lemp->nstate; 3832 while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; 3833 fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; 3834 fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; 3835 fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; 3836 fprintf(out, "static const %s yy_reduce_ofst[] = {\n", 3837 minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; 3838 for(i=j=0; i<n; i++){ 3839 int ofst; 3840 stp = lemp->sorted[i]; 3841 ofst = stp->iNtOfst; 3842 if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; 3843 if( j==0 ) fprintf(out," /* %5d */ ", i); 3844 fprintf(out, " %4d,", ofst); 3845 if( j==9 || i==n-1 ){ 3846 fprintf(out, "\n"); lineno++; 3847 j = 0; 3848 }else{ 3849 j++; 3850 } 3851 } 3852 fprintf(out, "};\n"); lineno++; 3853 3854 /* Output the default action table */ 3855 fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; 3856 n = lemp->nstate; 3857 for(i=j=0; i<n; i++){ 3858 stp = lemp->sorted[i]; 3859 if( j==0 ) fprintf(out," /* %5d */ ", i); 3860 fprintf(out, " %4d,", stp->iDflt); 3861 if( j==9 || i==n-1 ){ 3862 fprintf(out, "\n"); lineno++; 3863 j = 0; 3864 }else{ 3865 j++; 3866 } 3867 } 3868 fprintf(out, "};\n"); lineno++; 3869 tplt_xfer(lemp->name,in,out,&lineno); 3870 3871 /* Generate the table of fallback tokens. 3872 */ 3873 if( lemp->has_fallback ){ 3874 int mx = lemp->nterminal - 1; 3875 while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } 3876 for(i=0; i<=mx; i++){ 3877 struct symbol *p = lemp->symbols[i]; 3878 if( p->fallback==0 ){ 3879 fprintf(out, " 0, /* %10s => nothing */\n", p->name); 3880 }else{ 3881 fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, 3882 p->name, p->fallback->name); 3883 } 3884 lineno++; 3885 } 3886 } 3887 tplt_xfer(lemp->name, in, out, &lineno); 3888 3889 /* Generate a table containing the symbolic name of every symbol 3890 */ 3891 for(i=0; i<lemp->nsymbol; i++){ 3892 sprintf(line,"\"%s\",",lemp->symbols[i]->name); 3893 fprintf(out," %-15s",line); 3894 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } 3895 } 3896 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } 3897 tplt_xfer(lemp->name,in,out,&lineno); 3898 3899 /* Generate a table containing a text string that describes every 3900 ** rule in the rule set of the grammar. This information is used 3901 ** when tracing REDUCE actions. 3902 */ 3903 for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ 3904 assert( rp->index==i ); 3905 fprintf(out," /* %3d */ \"", i); 3906 writeRuleText(out, rp); 3907 fprintf(out,"\",\n"); lineno++; 3908 } 3909 tplt_xfer(lemp->name,in,out,&lineno); 3910 3911 /* Generate code which executes every time a symbol is popped from 3912 ** the stack while processing errors or while destroying the parser. 3913 ** (In other words, generate the %destructor actions) 3914 */ 3915 if( lemp->tokendest ){ 3916 int once = 1; 3917 for(i=0; i<lemp->nsymbol; i++){ 3918 struct symbol *sp = lemp->symbols[i]; 3919 if( sp==0 || sp->type!=TERMINAL ) continue; 3920 if( once ){ 3921 fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; 3922 once = 0; 3923 } 3924 fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3925 } 3926 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); 3927 if( i<lemp->nsymbol ){ 3928 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3929 fprintf(out," break;\n"); lineno++; 3930 } 3931 } 3932 if( lemp->vardest ){ 3933 struct symbol *dflt_sp = 0; 3934 int once = 1; 3935 for(i=0; i<lemp->nsymbol; i++){ 3936 struct symbol *sp = lemp->symbols[i]; 3937 if( sp==0 || sp->type==TERMINAL || 3938 sp->index<=0 || sp->destructor!=0 ) continue; 3939 if( once ){ 3940 fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; 3941 once = 0; 3942 } 3943 fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3944 dflt_sp = sp; 3945 } 3946 if( dflt_sp!=0 ){ 3947 emit_destructor_code(out,dflt_sp,lemp,&lineno); 3948 } 3949 fprintf(out," break;\n"); lineno++; 3950 } 3951 for(i=0; i<lemp->nsymbol; i++){ 3952 struct symbol *sp = lemp->symbols[i]; 3953 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; 3954 fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; 3955 3956 /* Combine duplicate destructors into a single case */ 3957 for(j=i+1; j<lemp->nsymbol; j++){ 3958 struct symbol *sp2 = lemp->symbols[j]; 3959 if( sp2 && sp2->type!=TERMINAL && sp2->destructor 3960 && sp2->dtnum==sp->dtnum 3961 && strcmp(sp->destructor,sp2->destructor)==0 ){ 3962 fprintf(out," case %d: /* %s */\n", 3963 sp2->index, sp2->name); lineno++; 3964 sp2->destructor = 0; 3965 } 3966 } 3967 3968 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); 3969 fprintf(out," break;\n"); lineno++; 3970 } 3971 tplt_xfer(lemp->name,in,out,&lineno); 3972 3973 /* Generate code which executes whenever the parser stack overflows */ 3974 tplt_print(out,lemp,lemp->overflow,&lineno); 3975 tplt_xfer(lemp->name,in,out,&lineno); 3976 3977 /* Generate the table of rule information 3978 ** 3979 ** Note: This code depends on the fact that rules are number 3980 ** sequentually beginning with 0. 3981 */ 3982 for(rp=lemp->rule; rp; rp=rp->next){ 3983 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; 3984 } 3985 tplt_xfer(lemp->name,in,out,&lineno); 3986 3987 /* Generate code which execution during each REDUCE action */ 3988 for(rp=lemp->rule; rp; rp=rp->next){ 3989 translate_code(lemp, rp); 3990 } 3991 /* First output rules other than the default: rule */ 3992 for(rp=lemp->rule; rp; rp=rp->next){ 3993 struct rule *rp2; /* Other rules with the same action */ 3994 if( rp->code==0 ) continue; 3995 if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */ 3996 fprintf(out," case %d: /* ", rp->index); 3997 writeRuleText(out, rp); 3998 fprintf(out, " */\n"); lineno++; 3999 for(rp2=rp->next; rp2; rp2=rp2->next){ 4000 if( rp2->code==rp->code ){ 4001 fprintf(out," case %d: /* ", rp2->index); 4002 writeRuleText(out, rp2); 4003 fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++; 4004 rp2->code = 0; 4005 } 4006 } 4007 emit_code(out,rp,lemp,&lineno); 4008 fprintf(out," break;\n"); lineno++; 4009 rp->code = 0; 4010 } 4011 /* Finally, output the default: rule. We choose as the default: all 4012 ** empty actions. */ 4013 fprintf(out," default:\n"); lineno++; 4014 for(rp=lemp->rule; rp; rp=rp->next){ 4015 if( rp->code==0 ) continue; 4016 assert( rp->code[0]=='\n' && rp->code[1]==0 ); 4017 fprintf(out," /* (%d) ", rp->index); 4018 writeRuleText(out, rp); 4019 fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++; 4020 } 4021 fprintf(out," break;\n"); lineno++; 4022 tplt_xfer(lemp->name,in,out,&lineno); 4023 4024 /* Generate code which executes if a parse fails */ 4025 tplt_print(out,lemp,lemp->failure,&lineno); 4026 tplt_xfer(lemp->name,in,out,&lineno); 4027 4028 /* Generate code which executes when a syntax error occurs */ 4029 tplt_print(out,lemp,lemp->error,&lineno); 4030 tplt_xfer(lemp->name,in,out,&lineno); 4031 4032 /* Generate code which executes when the parser accepts its input */ 4033 tplt_print(out,lemp,lemp->accept,&lineno); 4034 tplt_xfer(lemp->name,in,out,&lineno); 4035 4036 /* Append any addition code the user desires */ 4037 tplt_print(out,lemp,lemp->extracode,&lineno); 4038 4039 fclose(in); 4040 fclose(out); 4041 return; 4042 } 4043 4044 /* Generate a header file for the parser */ 4045 void ReportHeader(struct lemon *lemp) 4046 { 4047 FILE *out, *in; 4048 const char *prefix; 4049 char line[LINESIZE]; 4050 char pattern[LINESIZE]; 4051 int i; 4052 4053 if( lemp->tokenprefix ) prefix = lemp->tokenprefix; 4054 else prefix = ""; 4055 in = file_open(lemp,".h","rb"); 4056 if( in ){ 4057 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ 4058 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 4059 if( strcmp(line,pattern) ) break; 4060 } 4061 fclose(in); 4062 if( i==lemp->nterminal ){ 4063 /* No change in the file. Don't rewrite it. */ 4064 return; 4065 } 4066 } 4067 out = file_open(lemp,".h","wb"); 4068 if( out ){ 4069 for(i=1; i<lemp->nterminal; i++){ 4070 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); 4071 } 4072 fclose(out); 4073 } 4074 return; 4075 } 4076 4077 /* Reduce the size of the action tables, if possible, by making use 4078 ** of defaults. 4079 ** 4080 ** In this version, we take the most frequent REDUCE action and make 4081 ** it the default. Except, there is no default if the wildcard token 4082 ** is a possible look-ahead. 4083 */ 4084 void CompressTables(struct lemon *lemp) 4085 { 4086 struct state *stp; 4087 struct action *ap, *ap2; 4088 struct rule *rp, *rp2, *rbest; 4089 int nbest, n; 4090 int i; 4091 int usesWildcard; 4092 4093 for(i=0; i<lemp->nstate; i++){ 4094 stp = lemp->sorted[i]; 4095 nbest = 0; 4096 rbest = 0; 4097 usesWildcard = 0; 4098 4099 for(ap=stp->ap; ap; ap=ap->next){ 4100 if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ 4101 usesWildcard = 1; 4102 } 4103 if( ap->type!=REDUCE ) continue; 4104 rp = ap->x.rp; 4105 if( rp->lhsStart ) continue; 4106 if( rp==rbest ) continue; 4107 n = 1; 4108 for(ap2=ap->next; ap2; ap2=ap2->next){ 4109 if( ap2->type!=REDUCE ) continue; 4110 rp2 = ap2->x.rp; 4111 if( rp2==rbest ) continue; 4112 if( rp2==rp ) n++; 4113 } 4114 if( n>nbest ){ 4115 nbest = n; 4116 rbest = rp; 4117 } 4118 } 4119 4120 /* Do not make a default if the number of rules to default 4121 ** is not at least 1 or if the wildcard token is a possible 4122 ** lookahead. 4123 */ 4124 if( nbest<1 || usesWildcard ) continue; 4125 4126 4127 /* Combine matching REDUCE actions into a single default */ 4128 for(ap=stp->ap; ap; ap=ap->next){ 4129 if( ap->type==REDUCE && ap->x.rp==rbest ) break; 4130 } 4131 assert( ap ); 4132 ap->sp = Symbol_new("{default}"); 4133 for(ap=ap->next; ap; ap=ap->next){ 4134 if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; 4135 } 4136 stp->ap = Action_sort(stp->ap); 4137 } 4138 } 4139 4140 4141 /* 4142 ** Compare two states for sorting purposes. The smaller state is the 4143 ** one with the most non-terminal actions. If they have the same number 4144 ** of non-terminal actions, then the smaller is the one with the most 4145 ** token actions. 4146 */ 4147 static int stateResortCompare(const void *a, const void *b){ 4148 const struct state *pA = *(const struct state**)a; 4149 const struct state *pB = *(const struct state**)b; 4150 int n; 4151 4152 n = pB->nNtAct - pA->nNtAct; 4153 if( n==0 ){ 4154 n = pB->nTknAct - pA->nTknAct; 4155 if( n==0 ){ 4156 n = pB->statenum - pA->statenum; 4157 } 4158 } 4159 assert( n!=0 ); 4160 return n; 4161 } 4162 4163 4164 /* 4165 ** Renumber and resort states so that states with fewer choices 4166 ** occur at the end. Except, keep state 0 as the first state. 4167 */ 4168 void ResortStates(struct lemon *lemp) 4169 { 4170 int i; 4171 struct state *stp; 4172 struct action *ap; 4173 4174 for(i=0; i<lemp->nstate; i++){ 4175 stp = lemp->sorted[i]; 4176 stp->nTknAct = stp->nNtAct = 0; 4177 stp->iDflt = lemp->nstate + lemp->nrule; 4178 stp->iTknOfst = NO_OFFSET; 4179 stp->iNtOfst = NO_OFFSET; 4180 for(ap=stp->ap; ap; ap=ap->next){ 4181 if( compute_action(lemp,ap)>=0 ){ 4182 if( ap->sp->index<lemp->nterminal ){ 4183 stp->nTknAct++; 4184 }else if( ap->sp->index<lemp->nsymbol ){ 4185 stp->nNtAct++; 4186 }else{ 4187 stp->iDflt = compute_action(lemp, ap); 4188 } 4189 } 4190 } 4191 } 4192 qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), 4193 stateResortCompare); 4194 for(i=0; i<lemp->nstate; i++){ 4195 lemp->sorted[i]->statenum = i; 4196 } 4197 } 4198 4199 4200 /***************** From the file "set.c" ************************************/ 4201 /* 4202 ** Set manipulation routines for the LEMON parser generator. 4203 */ 4204 4205 static int size = 0; 4206 4207 /* Set the set size */ 4208 void SetSize(int n) 4209 { 4210 size = n+1; 4211 } 4212 4213 /* Allocate a new set */ 4214 char *SetNew(){ 4215 char *s; 4216 s = (char*)calloc( size, 1); 4217 if( s==0 ){ 4218 extern void memory_error(); 4219 memory_error(); 4220 } 4221 return s; 4222 } 4223 4224 /* Deallocate a set */ 4225 void SetFree(char *s) 4226 { 4227 free(s); 4228 } 4229 4230 /* Add a new element to the set. Return TRUE if the element was added 4231 ** and FALSE if it was already there. */ 4232 int SetAdd(char *s, int e) 4233 { 4234 int rv; 4235 assert( e>=0 && e<size ); 4236 rv = s[e]; 4237 s[e] = 1; 4238 return !rv; 4239 } 4240 4241 /* Add every element of s2 to s1. Return TRUE if s1 changes. */ 4242 int SetUnion(char *s1, char *s2) 4243 { 4244 int i, progress; 4245 progress = 0; 4246 for(i=0; i<size; i++){ 4247 if( s2[i]==0 ) continue; 4248 if( s1[i]==0 ){ 4249 progress = 1; 4250 s1[i] = 1; 4251 } 4252 } 4253 return progress; 4254 } 4255 /********************** From the file "table.c" ****************************/ 4256 /* 4257 ** All code in this file has been automatically generated 4258 ** from a specification in the file 4259 ** "table.q" 4260 ** by the associative array code building program "aagen". 4261 ** Do not edit this file! Instead, edit the specification 4262 ** file, then rerun aagen. 4263 */ 4264 /* 4265 ** Code for processing tables in the LEMON parser generator. 4266 */ 4267 4268 PRIVATE int strhash(const char *x) 4269 { 4270 int h = 0; 4271 while( *x) h = h*13 + *(x++); 4272 return h; 4273 } 4274 4275 /* Works like strdup, sort of. Save a string in malloced memory, but 4276 ** keep strings in a table so that the same string is not in more 4277 ** than one place. 4278 */ 4279 const char *Strsafe(const char *y) 4280 { 4281 const char *z; 4282 char *cpy; 4283 4284 if( y==0 ) return 0; 4285 z = Strsafe_find(y); 4286 if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){ 4287 strcpy(cpy,y); 4288 z = cpy; 4289 Strsafe_insert(z); 4290 } 4291 MemoryCheck(z); 4292 return z; 4293 } 4294 4295 /* There is one instance of the following structure for each 4296 ** associative array of type "x1". 4297 */ 4298 struct s_x1 { 4299 int size; /* The number of available slots. */ 4300 /* Must be a power of 2 greater than or */ 4301 /* equal to 1 */ 4302 int count; /* Number of currently slots filled */ 4303 struct s_x1node *tbl; /* The data stored here */ 4304 struct s_x1node **ht; /* Hash table for lookups */ 4305 }; 4306 4307 /* There is one instance of this structure for every data element 4308 ** in an associative array of type "x1". 4309 */ 4310 typedef struct s_x1node { 4311 const char *data; /* The data */ 4312 struct s_x1node *next; /* Next entry with the same hash */ 4313 struct s_x1node **from; /* Previous link */ 4314 } x1node; 4315 4316 /* There is only one instance of the array, which is the following */ 4317 static struct s_x1 *x1a; 4318 4319 /* Allocate a new associative array */ 4320 void Strsafe_init(){ 4321 if( x1a ) return; 4322 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); 4323 if( x1a ){ 4324 x1a->size = 1024; 4325 x1a->count = 0; 4326 x1a->tbl = (x1node*)malloc( 4327 (sizeof(x1node) + sizeof(x1node*))*1024 ); 4328 if( x1a->tbl==0 ){ 4329 free(x1a); 4330 x1a = 0; 4331 }else{ 4332 int i; 4333 x1a->ht = (x1node**)&(x1a->tbl[1024]); 4334 for(i=0; i<1024; i++) x1a->ht[i] = 0; 4335 } 4336 } 4337 } 4338 /* Insert a new record into the array. Return TRUE if successful. 4339 ** Prior data with the same key is NOT overwritten */ 4340 int Strsafe_insert(const char *data) 4341 { 4342 x1node *np; 4343 int h; 4344 int ph; 4345 4346 if( x1a==0 ) return 0; 4347 ph = strhash(data); 4348 h = ph & (x1a->size-1); 4349 np = x1a->ht[h]; 4350 while( np ){ 4351 if( strcmp(np->data,data)==0 ){ 4352 /* An existing entry with the same key is found. */ 4353 /* Fail because overwrite is not allows. */ 4354 return 0; 4355 } 4356 np = np->next; 4357 } 4358 if( x1a->count>=x1a->size ){ 4359 /* Need to make the hash table bigger */ 4360 int i,size; 4361 struct s_x1 array; 4362 array.size = size = x1a->size*2; 4363 array.count = x1a->count; 4364 array.tbl = (x1node*)malloc( 4365 (sizeof(x1node) + sizeof(x1node*))*size ); 4366 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4367 array.ht = (x1node**)&(array.tbl[size]); 4368 for(i=0; i<size; i++) array.ht[i] = 0; 4369 for(i=0; i<x1a->count; i++){ 4370 x1node *oldnp, *newnp; 4371 oldnp = &(x1a->tbl[i]); 4372 h = strhash(oldnp->data) & (size-1); 4373 newnp = &(array.tbl[i]); 4374 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4375 newnp->next = array.ht[h]; 4376 newnp->data = oldnp->data; 4377 newnp->from = &(array.ht[h]); 4378 array.ht[h] = newnp; 4379 } 4380 free(x1a->tbl); 4381 *x1a = array; 4382 } 4383 /* Insert the new data */ 4384 h = ph & (x1a->size-1); 4385 np = &(x1a->tbl[x1a->count++]); 4386 np->data = data; 4387 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); 4388 np->next = x1a->ht[h]; 4389 x1a->ht[h] = np; 4390 np->from = &(x1a->ht[h]); 4391 return 1; 4392 } 4393 4394 /* Return a pointer to data assigned to the given key. Return NULL 4395 ** if no such key. */ 4396 const char *Strsafe_find(const char *key) 4397 { 4398 int h; 4399 x1node *np; 4400 4401 if( x1a==0 ) return 0; 4402 h = strhash(key) & (x1a->size-1); 4403 np = x1a->ht[h]; 4404 while( np ){ 4405 if( strcmp(np->data,key)==0 ) break; 4406 np = np->next; 4407 } 4408 return np ? np->data : 0; 4409 } 4410 4411 /* Return a pointer to the (terminal or nonterminal) symbol "x". 4412 ** Create a new symbol if this is the first time "x" has been seen. 4413 */ 4414 struct symbol *Symbol_new(const char *x) 4415 { 4416 struct symbol *sp; 4417 4418 sp = Symbol_find(x); 4419 if( sp==0 ){ 4420 sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); 4421 MemoryCheck(sp); 4422 sp->name = Strsafe(x); 4423 sp->type = isupper(*x) ? TERMINAL : NONTERMINAL; 4424 sp->rule = 0; 4425 sp->fallback = 0; 4426 sp->prec = -1; 4427 sp->assoc = UNK; 4428 sp->firstset = 0; 4429 sp->lambda = LEMON_FALSE; 4430 sp->destructor = 0; 4431 sp->destLineno = 0; 4432 sp->datatype = 0; 4433 sp->useCnt = 0; 4434 Symbol_insert(sp,sp->name); 4435 } 4436 sp->useCnt++; 4437 return sp; 4438 } 4439 4440 /* Compare two symbols for working purposes 4441 ** 4442 ** Symbols that begin with upper case letters (terminals or tokens) 4443 ** must sort before symbols that begin with lower case letters 4444 ** (non-terminals). Other than that, the order does not matter. 4445 ** 4446 ** We find experimentally that leaving the symbols in their original 4447 ** order (the order they appeared in the grammar file) gives the 4448 ** smallest parser tables in SQLite. 4449 */ 4450 int Symbolcmpp(const void *_a, const void *_b) 4451 { 4452 const struct symbol **a = (const struct symbol **) _a; 4453 const struct symbol **b = (const struct symbol **) _b; 4454 int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); 4455 int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); 4456 assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 ); 4457 return i1-i2; 4458 } 4459 4460 /* There is one instance of the following structure for each 4461 ** associative array of type "x2". 4462 */ 4463 struct s_x2 { 4464 int size; /* The number of available slots. */ 4465 /* Must be a power of 2 greater than or */ 4466 /* equal to 1 */ 4467 int count; /* Number of currently slots filled */ 4468 struct s_x2node *tbl; /* The data stored here */ 4469 struct s_x2node **ht; /* Hash table for lookups */ 4470 }; 4471 4472 /* There is one instance of this structure for every data element 4473 ** in an associative array of type "x2". 4474 */ 4475 typedef struct s_x2node { 4476 struct symbol *data; /* The data */ 4477 const char *key; /* The key */ 4478 struct s_x2node *next; /* Next entry with the same hash */ 4479 struct s_x2node **from; /* Previous link */ 4480 } x2node; 4481 4482 /* There is only one instance of the array, which is the following */ 4483 static struct s_x2 *x2a; 4484 4485 /* Allocate a new associative array */ 4486 void Symbol_init(){ 4487 if( x2a ) return; 4488 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); 4489 if( x2a ){ 4490 x2a->size = 128; 4491 x2a->count = 0; 4492 x2a->tbl = (x2node*)malloc( 4493 (sizeof(x2node) + sizeof(x2node*))*128 ); 4494 if( x2a->tbl==0 ){ 4495 free(x2a); 4496 x2a = 0; 4497 }else{ 4498 int i; 4499 x2a->ht = (x2node**)&(x2a->tbl[128]); 4500 for(i=0; i<128; i++) x2a->ht[i] = 0; 4501 } 4502 } 4503 } 4504 /* Insert a new record into the array. Return TRUE if successful. 4505 ** Prior data with the same key is NOT overwritten */ 4506 int Symbol_insert(struct symbol *data, const char *key) 4507 { 4508 x2node *np; 4509 int h; 4510 int ph; 4511 4512 if( x2a==0 ) return 0; 4513 ph = strhash(key); 4514 h = ph & (x2a->size-1); 4515 np = x2a->ht[h]; 4516 while( np ){ 4517 if( strcmp(np->key,key)==0 ){ 4518 /* An existing entry with the same key is found. */ 4519 /* Fail because overwrite is not allows. */ 4520 return 0; 4521 } 4522 np = np->next; 4523 } 4524 if( x2a->count>=x2a->size ){ 4525 /* Need to make the hash table bigger */ 4526 int i,size; 4527 struct s_x2 array; 4528 array.size = size = x2a->size*2; 4529 array.count = x2a->count; 4530 array.tbl = (x2node*)malloc( 4531 (sizeof(x2node) + sizeof(x2node*))*size ); 4532 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4533 array.ht = (x2node**)&(array.tbl[size]); 4534 for(i=0; i<size; i++) array.ht[i] = 0; 4535 for(i=0; i<x2a->count; i++){ 4536 x2node *oldnp, *newnp; 4537 oldnp = &(x2a->tbl[i]); 4538 h = strhash(oldnp->key) & (size-1); 4539 newnp = &(array.tbl[i]); 4540 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4541 newnp->next = array.ht[h]; 4542 newnp->key = oldnp->key; 4543 newnp->data = oldnp->data; 4544 newnp->from = &(array.ht[h]); 4545 array.ht[h] = newnp; 4546 } 4547 free(x2a->tbl); 4548 *x2a = array; 4549 } 4550 /* Insert the new data */ 4551 h = ph & (x2a->size-1); 4552 np = &(x2a->tbl[x2a->count++]); 4553 np->key = key; 4554 np->data = data; 4555 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); 4556 np->next = x2a->ht[h]; 4557 x2a->ht[h] = np; 4558 np->from = &(x2a->ht[h]); 4559 return 1; 4560 } 4561 4562 /* Return a pointer to data assigned to the given key. Return NULL 4563 ** if no such key. */ 4564 struct symbol *Symbol_find(const char *key) 4565 { 4566 int h; 4567 x2node *np; 4568 4569 if( x2a==0 ) return 0; 4570 h = strhash(key) & (x2a->size-1); 4571 np = x2a->ht[h]; 4572 while( np ){ 4573 if( strcmp(np->key,key)==0 ) break; 4574 np = np->next; 4575 } 4576 return np ? np->data : 0; 4577 } 4578 4579 /* Return the n-th data. Return NULL if n is out of range. */ 4580 struct symbol *Symbol_Nth(int n) 4581 { 4582 struct symbol *data; 4583 if( x2a && n>0 && n<=x2a->count ){ 4584 data = x2a->tbl[n-1].data; 4585 }else{ 4586 data = 0; 4587 } 4588 return data; 4589 } 4590 4591 /* Return the size of the array */ 4592 int Symbol_count() 4593 { 4594 return x2a ? x2a->count : 0; 4595 } 4596 4597 /* Return an array of pointers to all data in the table. 4598 ** The array is obtained from malloc. Return NULL if memory allocation 4599 ** problems, or if the array is empty. */ 4600 struct symbol **Symbol_arrayof() 4601 { 4602 struct symbol **array; 4603 int i,size; 4604 if( x2a==0 ) return 0; 4605 size = x2a->count; 4606 array = (struct symbol **)calloc(size, sizeof(struct symbol *)); 4607 if( array ){ 4608 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data; 4609 } 4610 return array; 4611 } 4612 4613 /* Compare two configurations */ 4614 int Configcmp(const char *_a,const char *_b) 4615 { 4616 const struct config *a = (struct config *) _a; 4617 const struct config *b = (struct config *) _b; 4618 int x; 4619 x = a->rp->index - b->rp->index; 4620 if( x==0 ) x = a->dot - b->dot; 4621 return x; 4622 } 4623 4624 /* Compare two states */ 4625 PRIVATE int statecmp(struct config *a, struct config *b) 4626 { 4627 int rc; 4628 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ 4629 rc = a->rp->index - b->rp->index; 4630 if( rc==0 ) rc = a->dot - b->dot; 4631 } 4632 if( rc==0 ){ 4633 if( a ) rc = 1; 4634 if( b ) rc = -1; 4635 } 4636 return rc; 4637 } 4638 4639 /* Hash a state */ 4640 PRIVATE int statehash(struct config *a) 4641 { 4642 int h=0; 4643 while( a ){ 4644 h = h*571 + a->rp->index*37 + a->dot; 4645 a = a->bp; 4646 } 4647 return h; 4648 } 4649 4650 /* Allocate a new state structure */ 4651 struct state *State_new() 4652 { 4653 struct state *newstate; 4654 newstate = (struct state *)calloc(1, sizeof(struct state) ); 4655 MemoryCheck(newstate); 4656 return newstate; 4657 } 4658 4659 /* There is one instance of the following structure for each 4660 ** associative array of type "x3". 4661 */ 4662 struct s_x3 { 4663 int size; /* The number of available slots. */ 4664 /* Must be a power of 2 greater than or */ 4665 /* equal to 1 */ 4666 int count; /* Number of currently slots filled */ 4667 struct s_x3node *tbl; /* The data stored here */ 4668 struct s_x3node **ht; /* Hash table for lookups */ 4669 }; 4670 4671 /* There is one instance of this structure for every data element 4672 ** in an associative array of type "x3". 4673 */ 4674 typedef struct s_x3node { 4675 struct state *data; /* The data */ 4676 struct config *key; /* The key */ 4677 struct s_x3node *next; /* Next entry with the same hash */ 4678 struct s_x3node **from; /* Previous link */ 4679 } x3node; 4680 4681 /* There is only one instance of the array, which is the following */ 4682 static struct s_x3 *x3a; 4683 4684 /* Allocate a new associative array */ 4685 void State_init(){ 4686 if( x3a ) return; 4687 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); 4688 if( x3a ){ 4689 x3a->size = 128; 4690 x3a->count = 0; 4691 x3a->tbl = (x3node*)malloc( 4692 (sizeof(x3node) + sizeof(x3node*))*128 ); 4693 if( x3a->tbl==0 ){ 4694 free(x3a); 4695 x3a = 0; 4696 }else{ 4697 int i; 4698 x3a->ht = (x3node**)&(x3a->tbl[128]); 4699 for(i=0; i<128; i++) x3a->ht[i] = 0; 4700 } 4701 } 4702 } 4703 /* Insert a new record into the array. Return TRUE if successful. 4704 ** Prior data with the same key is NOT overwritten */ 4705 int State_insert(struct state *data, struct config *key) 4706 { 4707 x3node *np; 4708 int h; 4709 int ph; 4710 4711 if( x3a==0 ) return 0; 4712 ph = statehash(key); 4713 h = ph & (x3a->size-1); 4714 np = x3a->ht[h]; 4715 while( np ){ 4716 if( statecmp(np->key,key)==0 ){ 4717 /* An existing entry with the same key is found. */ 4718 /* Fail because overwrite is not allows. */ 4719 return 0; 4720 } 4721 np = np->next; 4722 } 4723 if( x3a->count>=x3a->size ){ 4724 /* Need to make the hash table bigger */ 4725 int i,size; 4726 struct s_x3 array; 4727 array.size = size = x3a->size*2; 4728 array.count = x3a->count; 4729 array.tbl = (x3node*)malloc( 4730 (sizeof(x3node) + sizeof(x3node*))*size ); 4731 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4732 array.ht = (x3node**)&(array.tbl[size]); 4733 for(i=0; i<size; i++) array.ht[i] = 0; 4734 for(i=0; i<x3a->count; i++){ 4735 x3node *oldnp, *newnp; 4736 oldnp = &(x3a->tbl[i]); 4737 h = statehash(oldnp->key) & (size-1); 4738 newnp = &(array.tbl[i]); 4739 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4740 newnp->next = array.ht[h]; 4741 newnp->key = oldnp->key; 4742 newnp->data = oldnp->data; 4743 newnp->from = &(array.ht[h]); 4744 array.ht[h] = newnp; 4745 } 4746 free(x3a->tbl); 4747 *x3a = array; 4748 } 4749 /* Insert the new data */ 4750 h = ph & (x3a->size-1); 4751 np = &(x3a->tbl[x3a->count++]); 4752 np->key = key; 4753 np->data = data; 4754 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); 4755 np->next = x3a->ht[h]; 4756 x3a->ht[h] = np; 4757 np->from = &(x3a->ht[h]); 4758 return 1; 4759 } 4760 4761 /* Return a pointer to data assigned to the given key. Return NULL 4762 ** if no such key. */ 4763 struct state *State_find(struct config *key) 4764 { 4765 int h; 4766 x3node *np; 4767 4768 if( x3a==0 ) return 0; 4769 h = statehash(key) & (x3a->size-1); 4770 np = x3a->ht[h]; 4771 while( np ){ 4772 if( statecmp(np->key,key)==0 ) break; 4773 np = np->next; 4774 } 4775 return np ? np->data : 0; 4776 } 4777 4778 /* Return an array of pointers to all data in the table. 4779 ** The array is obtained from malloc. Return NULL if memory allocation 4780 ** problems, or if the array is empty. */ 4781 struct state **State_arrayof() 4782 { 4783 struct state **array; 4784 int i,size; 4785 if( x3a==0 ) return 0; 4786 size = x3a->count; 4787 array = (struct state **)malloc( sizeof(struct state *)*size ); 4788 if( array ){ 4789 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data; 4790 } 4791 return array; 4792 } 4793 4794 /* Hash a configuration */ 4795 PRIVATE int confighash(struct config *a) 4796 { 4797 int h=0; 4798 h = h*571 + a->rp->index*37 + a->dot; 4799 return h; 4800 } 4801 4802 /* There is one instance of the following structure for each 4803 ** associative array of type "x4". 4804 */ 4805 struct s_x4 { 4806 int size; /* The number of available slots. */ 4807 /* Must be a power of 2 greater than or */ 4808 /* equal to 1 */ 4809 int count; /* Number of currently slots filled */ 4810 struct s_x4node *tbl; /* The data stored here */ 4811 struct s_x4node **ht; /* Hash table for lookups */ 4812 }; 4813 4814 /* There is one instance of this structure for every data element 4815 ** in an associative array of type "x4". 4816 */ 4817 typedef struct s_x4node { 4818 struct config *data; /* The data */ 4819 struct s_x4node *next; /* Next entry with the same hash */ 4820 struct s_x4node **from; /* Previous link */ 4821 } x4node; 4822 4823 /* There is only one instance of the array, which is the following */ 4824 static struct s_x4 *x4a; 4825 4826 /* Allocate a new associative array */ 4827 void Configtable_init(){ 4828 if( x4a ) return; 4829 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); 4830 if( x4a ){ 4831 x4a->size = 64; 4832 x4a->count = 0; 4833 x4a->tbl = (x4node*)malloc( 4834 (sizeof(x4node) + sizeof(x4node*))*64 ); 4835 if( x4a->tbl==0 ){ 4836 free(x4a); 4837 x4a = 0; 4838 }else{ 4839 int i; 4840 x4a->ht = (x4node**)&(x4a->tbl[64]); 4841 for(i=0; i<64; i++) x4a->ht[i] = 0; 4842 } 4843 } 4844 } 4845 /* Insert a new record into the array. Return TRUE if successful. 4846 ** Prior data with the same key is NOT overwritten */ 4847 int Configtable_insert(struct config *data) 4848 { 4849 x4node *np; 4850 int h; 4851 int ph; 4852 4853 if( x4a==0 ) return 0; 4854 ph = confighash(data); 4855 h = ph & (x4a->size-1); 4856 np = x4a->ht[h]; 4857 while( np ){ 4858 if( Configcmp((const char *) np->data,(const char *) data)==0 ){ 4859 /* An existing entry with the same key is found. */ 4860 /* Fail because overwrite is not allows. */ 4861 return 0; 4862 } 4863 np = np->next; 4864 } 4865 if( x4a->count>=x4a->size ){ 4866 /* Need to make the hash table bigger */ 4867 int i,size; 4868 struct s_x4 array; 4869 array.size = size = x4a->size*2; 4870 array.count = x4a->count; 4871 array.tbl = (x4node*)malloc( 4872 (sizeof(x4node) + sizeof(x4node*))*size ); 4873 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ 4874 array.ht = (x4node**)&(array.tbl[size]); 4875 for(i=0; i<size; i++) array.ht[i] = 0; 4876 for(i=0; i<x4a->count; i++){ 4877 x4node *oldnp, *newnp; 4878 oldnp = &(x4a->tbl[i]); 4879 h = confighash(oldnp->data) & (size-1); 4880 newnp = &(array.tbl[i]); 4881 if( array.ht[h] ) array.ht[h]->from = &(newnp->next); 4882 newnp->next = array.ht[h]; 4883 newnp->data = oldnp->data; 4884 newnp->from = &(array.ht[h]); 4885 array.ht[h] = newnp; 4886 } 4887 free(x4a->tbl); 4888 *x4a = array; 4889 } 4890 /* Insert the new data */ 4891 h = ph & (x4a->size-1); 4892 np = &(x4a->tbl[x4a->count++]); 4893 np->data = data; 4894 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); 4895 np->next = x4a->ht[h]; 4896 x4a->ht[h] = np; 4897 np->from = &(x4a->ht[h]); 4898 return 1; 4899 } 4900 4901 /* Return a pointer to data assigned to the given key. Return NULL 4902 ** if no such key. */ 4903 struct config *Configtable_find(struct config *key) 4904 { 4905 int h; 4906 x4node *np; 4907 4908 if( x4a==0 ) return 0; 4909 h = confighash(key) & (x4a->size-1); 4910 np = x4a->ht[h]; 4911 while( np ){ 4912 if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; 4913 np = np->next; 4914 } 4915 return np ? np->data : 0; 4916 } 4917 4918 /* Remove all data from the table. Pass each data to the function "f" 4919 ** as it is removed. ("f" may be null to avoid this step.) */ 4920 void Configtable_clear(int(*f)(struct config *)) 4921 { 4922 int i; 4923 if( x4a==0 || x4a->count==0 ) return; 4924 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data); 4925 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0; 4926 x4a->count = 0; 4927 return; 4928 } 4929