1 /* 2 * PCCTSAST.C 3 * 4 * SOFTWARE RIGHTS 5 * 6 * We reserve no LEGAL rights to SORCERER -- SORCERER is in the public 7 * domain. An individual or company may do whatever they wish with 8 * source code distributed with SORCERER or the code generated by 9 * SORCERER, including the incorporation of SORCERER, or its output, into 10 * commerical software. 11 * 12 * We encourage users to develop software with SORCERER. However, we do 13 * ask that credit is given to us for developing SORCERER. By "credit", 14 * we mean that if you incorporate our source code into one of your 15 * programs (commercial product, research project, or otherwise) that you 16 * acknowledge this fact somewhere in the documentation, research report, 17 * etc... If you like SORCERER and have developed a nice tool with the 18 * output, please mention that you developed it using SORCERER. In 19 * addition, we ask that this header remain intact in our source code. 20 * As long as these guidelines are kept, we expect to continue enhancing 21 * this system and expect to make other tools available as they are 22 * completed. 23 * 24 * SORCERER 1.00B14 and ANTLR 1.33 25 * Terence Parr 26 * Parr Research Corporation 27 * AHPCRC, University of Minnesota 28 * 1992-1998 29 */ 30 31 #define ANTLR_SUPPORT_CODE 32 33 #include "pcctscfg.h" 34 35 #include "PCCTSAST.h" 36 #include "pccts_stdarg.h" 37 38 PCCTS_NAMESPACE_STD 39 40 #include <ctype.h> 41 42 //#include "SList.h" 43 44 /* String Scanning/Parsing Stuff */ 45 46 const char *PCCTS_AST::scan_token_tbl[] = { /* MR20 const */ 47 "invalid", /* 0 */ 48 "LPAREN", /* 1 */ 49 "RPAREN", /* 2 */ 50 "PERCENT", /* 3 */ 51 "INT", /* 4 */ 52 "COLON", /* 5 */ 53 "POUND", /* 6 */ 54 "PERIOD", /* 7 */ 55 }; 56 57 void PCCTS_AST:: 58 addChild(PCCTS_AST *t) 59 { 60 if ( t==NULL ) return; 61 PCCTS_AST *s = down(); 62 if ( s!=NULL ) 63 { 64 while ( s->right()!=NULL ) s = s->right(); 65 s->setRight(t); 66 } 67 else 68 this->setDown(t); 69 } 70 71 void PCCTS_AST:: 72 lisp(FILE *f) 73 { 74 if ( down() != NULL ) fprintf(f," ("); 75 lisp_action(f); 76 if ( down()!=NULL ) down()->lisp(f); 77 if ( down() != NULL ) fprintf(f," )"); 78 if ( right()!=NULL ) right()->lisp(f); 79 } 80 81 /* build a tree (root child1 child2 ... NULL) 82 * If root is NULL, simply make the children siblings and return ptr 83 * to 1st sibling (child1). If root is not single node, return NULL. 84 * 85 * Siblings that are actually sibling lists themselves are handled 86 * correctly. For example #( NULL, #( NULL, A, B, C), D) results 87 * in the tree ( NULL A B C D ). 88 * 89 * Requires at least two parameters with the last one being NULL. If 90 * both are NULL, return NULL. 91 * 92 * The down() and right() down/right pointers are used to make the tree. 93 */ 94 PCCTS_AST *PCCTS_AST:: 95 make(PCCTS_AST *rt, ...) 96 { 97 va_list ap; 98 register PCCTS_AST *child, *sibling=NULL, *tail, *w; 99 PCCTS_AST *root; 100 101 va_start(ap, rt); 102 root = rt; 103 104 if ( root != NULL ) 105 if ( root->down() != NULL ) return NULL; 106 child = va_arg(ap, PCCTS_AST *); 107 while ( child != NULL ) 108 { 109 /* find end of child */ 110 for (w=child; w->right()!=NULL; w=w->right()) {;} 111 if ( sibling == NULL ) {sibling = child; tail = w;} 112 else {tail->setRight(child); tail = w;} 113 child = va_arg(ap, PCCTS_AST *); 114 } 115 if ( root==NULL ) root = sibling; 116 else root->setDown(sibling); 117 va_end(ap); 118 return root; 119 } 120 121 /* The following push and pop routines are only used by ast_find_all() */ 122 123 void PCCTS_AST:: 124 _push(PCCTS_AST **st, int *sp, PCCTS_AST *e) 125 { 126 (*sp)--; 127 require((*sp)>=0, "stack overflow"); 128 st[(*sp)] = e; 129 } 130 131 PCCTS_AST *PCCTS_AST:: 132 _pop(PCCTS_AST **st, int *sp) 133 { 134 PCCTS_AST *e = st[*sp]; 135 (*sp)++; 136 require((*sp)<=MaxTreeStackDepth, "stack underflow"); 137 return e; 138 } 139 140 /* Find all occurrences of u in t. 141 * 'cursor' must be initialized to 't'. It eventually 142 * returns NULL when no more occurrences of 'u' are found. 143 */ 144 PCCTS_AST *PCCTS_AST:: 145 ast_find_all(PCCTS_AST *u, PCCTS_AST **cursor) 146 { 147 PCCTS_AST *sib; 148 static PCCTS_AST *template_stack[MaxTreeStackDepth]; 149 static int tsp = MaxTreeStackDepth; 150 static int nesting = 0; 151 152 if ( *cursor == NULL ) return NULL; 153 if ( *cursor!=this ) sib = *cursor; 154 else { 155 /* else, first time--start at top of template 't' */ 156 tsp = MaxTreeStackDepth; 157 sib = this; 158 /* bottom of stack is always a NULL--"cookie" indicates "done" */ 159 _push(template_stack, &tsp, NULL); 160 } 161 162 keep_looking: 163 if ( sib==NULL ) /* hit end of sibling list */ 164 { 165 sib = _pop(template_stack, &tsp); 166 if ( sib == NULL ) { *cursor = NULL; return NULL; } 167 } 168 169 if ( sib->type() != u->type() ) 170 { 171 /* look for another match */ 172 if ( sib->down()!=NULL ) 173 { 174 if ( sib->right()!=NULL ) _push(template_stack, &tsp, sib->right()); 175 sib=sib->down(); 176 goto keep_looking; 177 } 178 /* nothing below to try, try next sibling */ 179 sib=sib->right(); 180 goto keep_looking; 181 } 182 183 /* found a matching root node, try to match what's below */ 184 if ( match_partial(sib, u) ) 185 { 186 /* record sibling cursor so we can pick up next from there */ 187 if ( sib->down()!=NULL ) 188 { 189 if ( sib->right()!=NULL ) _push(template_stack, &tsp, sib->right()); 190 *cursor = sib->down(); 191 } 192 else if ( sib->right()!=NULL ) *cursor = sib->right(); 193 else *cursor = _pop(template_stack, &tsp); 194 return sib; 195 } 196 197 /* no match, keep searching */ 198 if ( sib->down()!=NULL ) 199 { 200 if ( sib->right()!=NULL ) _push(template_stack, &tsp, sib->right()); 201 sib=sib->down(); 202 } 203 else sib = sib->right(); /* else, try to right if zip below */ 204 goto keep_looking; 205 } 206 207 /* are two trees exactly alike? */ 208 int PCCTS_AST:: 209 match(PCCTS_AST *u) 210 { 211 PCCTS_AST *t = this; 212 PCCTS_AST *sib; 213 214 if ( u==NULL ) return 0; 215 216 for (sib=t; sib!=NULL&&u!=NULL; sib=sib->right(), u=u->right()) 217 { 218 if ( sib->type() != u->type() ) return 0; 219 if ( sib->down()!=NULL ) 220 if ( !sib->down()->match(u->down()) ) return 0; 221 } 222 return 1; 223 } 224 225 /* Is 'u' a subtree of 't' beginning at the root? */ 226 int PCCTS_AST:: 227 match_partial(PCCTS_AST *t, PCCTS_AST *u) 228 { 229 PCCTS_AST *sib; 230 231 if ( u==NULL ) return 1; 232 if ( t==NULL ) if ( u!=NULL ) return 0; else return 1; 233 234 for (sib=t; sib!=NULL&&u!=NULL; sib=sib->right(), u=u->right()) 235 { 236 if ( sib->type() != u->type() ) return 0; 237 if ( sib->down()!=NULL ) 238 if ( !match_partial(sib->down(), u->down()) ) return 0; 239 } 240 return 1; 241 } 242 243 /* Walk the template tree 't' (matching against 'this'), filling in the 244 * 'labels' array, and setting 'n' according to how many labels were matched. 245 */ 246 int PCCTS_AST:: 247 scanmatch(ScanAST *t, PCCTS_AST **labels[], int *n) 248 { 249 ScanAST *sib; 250 PCCTS_AST *u = this; 251 252 if ( u==NULL ) return 0; 253 254 for (sib=t; sib!=NULL&&u!=NULL; sib=sib->right(), u=u->right()) 255 { 256 /* make sure tokens match; token of '0' means wildcard match */ 257 if ( sib->type() != u->type() && sib->type()!=0 ) return 0; 258 /* we have a matched token here; set label pointers if exists */ 259 if ( sib->label_num>0 ) 260 { 261 require(labels!=NULL, "label found in template, but no array of labels"); 262 (*n)++; 263 *(labels[sib->label_num-1]) = u; 264 } 265 /* match what's below if something there and current node is not wildcard */ 266 if ( sib->down()!=NULL && sib->type()!=0 ) 267 { 268 if ( sib->down()==NULL ) if ( u->down()!=NULL ) return 0; else return 1; 269 if ( !u->down()->scanmatch(sib->down(), labels, n) ) return 0; 270 } 271 } 272 return 1; 273 } 274 275 void PCCTS_AST:: 276 insert_after(PCCTS_AST *b) 277 { 278 PCCTS_AST *end; 279 if ( b==NULL ) return; 280 /* find end of b's child list */ 281 for (end=b; end->right()!=NULL; end=end->right()) {;} 282 end->setRight(this->right()); 283 this->setRight(b); 284 } 285 286 void PCCTS_AST:: 287 append(PCCTS_AST *b) 288 { 289 PCCTS_AST *end; 290 require(b!=NULL, "append: NULL input tree"); 291 /* find end of child list */ 292 for (end=this; end->right()!=NULL; end=end->right()) {;} 293 end->setRight(b); 294 } 295 296 PCCTS_AST *PCCTS_AST:: 297 tail() 298 { 299 PCCTS_AST *end; 300 /* find end of child list */ 301 for (end=this; end->right()!=NULL; end=end->right()) {;} 302 return end; 303 } 304 305 PCCTS_AST *PCCTS_AST:: 306 bottom() 307 { 308 PCCTS_AST *end; 309 /* find end of child list */ 310 for (end=this; end->down()!=NULL; end=end->down()) {;} 311 return end; 312 } 313 314 PCCTS_AST *PCCTS_AST:: 315 cut_between(PCCTS_AST *a, PCCTS_AST *b) 316 { 317 PCCTS_AST *end, *ret; 318 if (a==NULL||b==NULL) return NULL; 319 /* find node pointing to b */ 320 for (end=a; end->right()!=NULL&&end->right()!=b; end=end->right()) 321 {;} 322 if (end->right()==NULL) return NULL; //ast_cut_between: a,b not connected 323 end->setRight(NULL); /* don't want it point to 'b' anymore */ 324 ret = a->right(); 325 a->setRight(b); 326 return ret; 327 } 328 329 #ifdef NOT_YET 330 SList *PCCTS_AST:: 331 to_slist() 332 { 333 SList *list = new SList; 334 PCCTS_AST *p; 335 336 for (p=this; p!=NULL; p=p->right()) 337 { 338 list->add(p); 339 } 340 return list; 341 } 342 #endif 343 344 void PCCTS_AST:: 345 tfree() 346 { 347 PCCTS_AST *t = this; 348 if ( t->down()!=NULL ) t->down()->tfree(); 349 if ( t->right()!=NULL ) t->right()->tfree(); 350 delete t; 351 } 352 353 int PCCTS_AST:: 354 nsiblings() 355 { 356 PCCTS_AST *t = this; 357 int n=0; 358 359 while ( t!=NULL ) 360 { 361 n++; 362 t = t->right(); 363 } 364 return n; 365 } 366 367 PCCTS_AST *PCCTS_AST:: 368 sibling_index(int i) 369 { 370 PCCTS_AST *t = this; 371 int j=1; 372 require(i>0, "sibling_index: i<=0"); 373 374 while ( t!=NULL ) 375 { 376 if ( j==i ) return t; 377 j++; 378 t = t->right(); 379 } 380 return NULL; 381 } 382 383 /* Assume this is a root node of a tree-- 384 * duplicate that node and what's below; ignore siblings of root node. 385 */ 386 387 // MR9 23-Sep-97 RJV 388 // MR9 389 // MR9 RJV: Original version only duplicated the node and down elements. 390 // MR9 Made copies of the pointers to sibling. 391 // MR9 Changed call "down()->deepCopy()" to "down()->deepCopyBushy()" 392 // MR9 393 394 PCCTS_AST *PCCTS_AST:: 395 deepCopy() 396 { 397 PCCTS_AST *u = this->shallowCopy(); 398 if ( down()!=NULL ) u->setDown(down()->deepCopyBushy()); 399 u->setRight(NULL); 400 return u; 401 } 402 403 /* Copy all nodes including siblings of root. */ 404 PCCTS_AST *PCCTS_AST:: 405 deepCopyBushy() 406 { 407 PCCTS_AST *u = this->shallowCopy(); 408 /* copy the rest of the tree */ 409 if ( down()!=NULL ) u->setDown(down()->deepCopyBushy()); 410 if ( right()!=NULL ) u->setRight(right()->deepCopyBushy()); 411 return u; 412 } 413 414 void PCCTS_AST:: 415 scanast_free(ScanAST *t) 416 { 417 if ( t == NULL ) return; 418 scanast_free( t->down() ); 419 scanast_free( t->right() ); 420 free( (char *) t ); // MR1 421 } 422 423 /* 424 * scan 425 * 426 * This function is like scanf(): it attempts to match a template 427 * against an input tree. A variable number of tree pointers 428 * may be set according to the '%i' labels in the template string. 429 * For example: 430 * 431 * t->ast_scan("#( 6 #(5 %1:4 %2:3) #(1 %3:3 %4:3) )", 432 * &w, &x, &y, &z); 433 * 434 * Naturally, you'd want this converted from 435 * 436 * t->ast_scan("#( RangeOp #(Minus %1:IConst %2:Var) #(Plus %3:Var %4Var) )", 437 * &w, &x, &y, &z); 438 * 439 * by SORCERER. 440 * 441 * This function call must be done withing a SORCERER file because SORCERER 442 * must convert the token references to the associated token number. 443 * 444 * This functions parses the template and creates trees which are then 445 * matched against the input tree. The labels are set as they are 446 * encountered; hence, partial matches may leave some pointers set 447 * and some NULL. This routines initializes all argument pointers to NULL 448 * at the beginning. 449 * 450 * This function returns the number of labels matched. 451 */ 452 int PCCTS_AST:: 453 ast_scan(char *templ, ...) 454 { 455 va_list ap; 456 ScanAST *tmpl; 457 int n, i, found=0; 458 PCCTS_AST ***label_ptrs=NULL; 459 460 va_start(ap, templ); 461 462 /* make a ScanAST tree out of the template */ 463 tmpl = stringparser_parse_scanast(templ, &n); 464 465 /* make an array out of the labels */ 466 if ( n>0 ) 467 { 468 label_ptrs = (PCCTS_AST ***) calloc(n, sizeof(PCCTS_AST **)); 469 require(label_ptrs!=NULL, "scan: out of memory"); 470 for (i=1; i<=n; i++) 471 { 472 label_ptrs[i-1] = va_arg(ap, PCCTS_AST **); 473 *(label_ptrs[i-1]) = NULL; 474 } 475 } 476 477 /* match the input tree against the template */ 478 scanmatch(tmpl, label_ptrs, &found); 479 480 scanast_free(tmpl); 481 free( (char *) label_ptrs); // MR1 482 483 return found; 484 } 485 486 ScanAST *PCCTS_AST:: 487 new_scanast(int tok) 488 { 489 ScanAST *p = (ScanAST *) calloc(1, sizeof(ScanAST)); 490 // 491 // 7-Apr-97 133MR1 492 // 493 if ( p == NULL ) { // MR1 494 fprintf(stderr, "out of mem\n"); // MR1 495 exit(PCCTS_EXIT_FAILURE); // MR1 496 }; // MR1 497 p->_token = tok; 498 return p; 499 } 500 501 ScanAST *PCCTS_AST:: 502 stringparser_parse_scanast(char *templ, int *num_labels) 503 { 504 StringLexer lex; 505 StringParser parser; 506 ScanAST *t; 507 508 stringlexer_init(&lex, templ); 509 stringparser_init(&parser, &lex); 510 t = stringparser_parse_tree(&parser); 511 *num_labels = parser.num_labels; 512 return t; 513 } 514 515 void PCCTS_AST:: 516 stringparser_match(StringParser *parser, int token) 517 { 518 if ( parser->token != token ) panic("bad tree in scan()"); 519 } 520 521 /* 522 * Match a tree of the form: 523 * (root child1 child2 ... childn) 524 * or, 525 * node 526 * 527 * where the elements are integers or labeled integers. 528 */ 529 ScanAST *PCCTS_AST:: 530 stringparser_parse_tree(StringParser *parser) 531 { 532 ScanAST *t=NULL, *root, *child, *last; 533 534 if ( parser->token != __POUND ) 535 { 536 return stringparser_parse_element(parser); 537 } 538 stringparser_match(parser,__POUND); 539 parser->token = stringscan_gettok(parser->lexer); 540 stringparser_match(parser,__LPAREN); 541 parser->token = stringscan_gettok(parser->lexer); 542 root = stringparser_parse_element(parser); 543 while ( parser->token != __RPAREN ) 544 { 545 child = stringparser_parse_element(parser); 546 if ( t==NULL ) { t = child; last = t; } 547 else { last->_right = child; last = child; } 548 } 549 stringparser_match(parser,__RPAREN); 550 parser->token = stringscan_gettok(parser->lexer); 551 root->_down = t; 552 return root; 553 } 554 555 ScanAST *PCCTS_AST:: 556 stringparser_parse_element(StringParser *parser) 557 { 558 static char ebuf[100]; 559 int label = 0; 560 561 if ( parser->token == __POUND ) 562 { 563 return stringparser_parse_tree(parser); 564 } 565 if ( parser->token == __PERCENT ) 566 { 567 parser->token = stringscan_gettok(parser->lexer); 568 stringparser_match(parser,__INT); 569 label = atoi(parser->lexer->text); 570 parser->num_labels++; 571 if ( label==0 ) panic("%%0 is an invalid label"); 572 parser->token = stringscan_gettok(parser->lexer); 573 stringparser_match(parser,__COLON); 574 parser->token = stringscan_gettok(parser->lexer); 575 /* can label tokens and wildcards */ 576 if ( parser->token != __INT && parser->token != __PERIOD ) 577 panic("can only label tokens"); 578 } 579 if ( parser->token == __INT ) 580 { 581 ScanAST *p = new_scanast(atoi(parser->lexer->text)); 582 parser->token = stringscan_gettok(parser->lexer); 583 p->label_num = label; 584 return p; 585 } 586 if ( parser->token == __PERIOD ) 587 { 588 ScanAST *p = new_scanast(0); /* token of 0 is wildcard */ 589 parser->token = stringscan_gettok(parser->lexer); 590 p->label_num = label; 591 return p; 592 } 593 sprintf(ebuf, "mismatch token in scan(): %s", scan_token_str(parser->token)); 594 panic(ebuf); 595 return NULL; 596 } 597 598 void PCCTS_AST:: 599 stringparser_init(StringParser *parser, StringLexer *input) 600 { 601 parser->lexer = input; 602 parser->token = stringscan_gettok(parser->lexer); 603 parser->num_labels = 0; 604 } 605 606 void PCCTS_AST:: 607 stringlexer_init(StringLexer *scanner, char *input) 608 { 609 scanner->text[0]='\0'; 610 scanner->input = input; 611 scanner->p = input; 612 stringscan_advance(scanner); 613 } 614 615 void PCCTS_AST:: 616 stringscan_advance(StringLexer *scanner) 617 { 618 if ( *(scanner->p) == '\0' ) scanner->c = __StringScanEOF; 619 scanner->c = *(scanner->p)++; 620 } 621 622 int PCCTS_AST:: 623 stringscan_gettok(StringLexer *scanner) 624 { 625 char *index = &scanner->text[0]; 626 static char ebuf[100]; 627 628 while ( isspace(scanner->c) ) { stringscan_advance(scanner); } 629 if ( isdigit(scanner->c) ) 630 { 631 int tok = __INT; 632 while ( isdigit(scanner->c) ) { 633 *index++ = scanner->c; 634 stringscan_advance(scanner); 635 } 636 *index = '\0'; 637 return tok; 638 } 639 switch ( scanner->c ) 640 { 641 case '#' : stringscan_advance(scanner); return __POUND; 642 case '(' : stringscan_advance(scanner); return __LPAREN; 643 case ')' : stringscan_advance(scanner); return __RPAREN; 644 case '%' : stringscan_advance(scanner); return __PERCENT; 645 case ':' : stringscan_advance(scanner); return __COLON; 646 case '.' : stringscan_advance(scanner); return __PERIOD; 647 case '\0' : return __StringScanEOF; 648 case __StringScanEOF : return __StringScanEOF; 649 default : 650 sprintf(ebuf, "invalid char in scan: '%c'", scanner->c); 651 panic(ebuf); 652 } 653 return __StringScanEOF; // never reached 654 } 655 656 const char *PCCTS_AST:: /* MR20 const */ 657 scan_token_str(int t) 658 { 659 if ( VALID_SCAN_TOKEN(t) ) return scan_token_tbl[t]; 660 else if ( t==__StringScanEOF ) return "<end-of-string>"; 661 else return "<invalid-token>"; 662 } 663