1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 %{ 26 #include <stdio.h> 27 #include <string.h> 28 #include "awk.h" 29 30 void checkdup(Node *list, Cell *item); 31 int yywrap(void) { return(1); } 32 33 Node *beginloc = 0; 34 Node *endloc = 0; 35 int infunc = 0; /* = 1 if in arglist or body of func */ 36 int inloop = 0; /* = 1 if in while, for, do */ 37 char *curfname = 0; /* current function name */ 38 Node *arglist = 0; /* list of args for current function */ 39 %} 40 41 %union { 42 Node *p; 43 Cell *cp; 44 int i; 45 char *s; 46 } 47 48 %token <i> FIRSTTOKEN /* must be first */ 49 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 50 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 51 %token <i> ARRAY 52 %token <i> MATCH NOTMATCH MATCHOP 53 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE 54 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 55 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 56 %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 57 %token <i> ADD MINUS MULT DIVIDE MOD 58 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 59 %token <i> PRINT PRINTF SPRINTF 60 %token <p> ELSE INTEST CONDEXPR 61 %token <i> POSTINCR PREINCR POSTDECR PREDECR 62 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 63 %token <s> REGEXPR 64 65 %type <p> pas pattern ppattern plist pplist patlist prarg term re 66 %type <p> pa_pat pa_stat pa_stats 67 %type <s> reg_expr 68 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 69 %type <p> var varname funcname varlist 70 %type <p> for if else while 71 %type <i> do st 72 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 73 %type <i> subop print 74 75 %right ASGNOP 76 %right '?' 77 %right ':' 78 %left BOR 79 %left AND 80 %left GETLINE 81 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 82 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 83 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 84 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 85 %left REGEXPR VAR VARNF IVAR WHILE '(' 86 %left CAT 87 %left '+' '-' 88 %left '*' '/' '%' 89 %left NOT UMINUS 90 %right POWER 91 %right DECR INCR 92 %left INDIRECT 93 %token LASTTOKEN /* must be last */ 94 95 %% 96 97 program: 98 pas { if (errorflag==0) 99 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 100 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 101 ; 102 103 and: 104 AND | and NL 105 ; 106 107 bor: 108 BOR | bor NL 109 ; 110 111 comma: 112 ',' | comma NL 113 ; 114 115 do: 116 DO | do NL 117 ; 118 119 else: 120 ELSE | else NL 121 ; 122 123 for: 124 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 125 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 126 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 127 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 128 | FOR '(' varname IN varname rparen {inloop++;} stmt 129 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 130 ; 131 132 funcname: 133 VAR { setfname($1); } 134 | CALL { setfname($1); } 135 ; 136 137 if: 138 IF '(' pattern rparen { $$ = notnull($3); } 139 ; 140 141 lbrace: 142 '{' | lbrace NL 143 ; 144 145 nl: 146 NL | nl NL 147 ; 148 149 opt_nl: 150 /* empty */ { $$ = 0; } 151 | nl 152 ; 153 154 opt_pst: 155 /* empty */ { $$ = 0; } 156 | pst 157 ; 158 159 160 opt_simple_stmt: 161 /* empty */ { $$ = 0; } 162 | simple_stmt 163 ; 164 165 pas: 166 opt_pst { $$ = 0; } 167 | opt_pst pa_stats opt_pst { $$ = $2; } 168 ; 169 170 pa_pat: 171 pattern { $$ = notnull($1); } 172 ; 173 174 pa_stat: 175 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 176 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 177 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } 178 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } 179 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 180 | XBEGIN lbrace stmtlist '}' 181 { beginloc = linkum(beginloc, $3); $$ = 0; } 182 | XEND lbrace stmtlist '}' 183 { endloc = linkum(endloc, $3); $$ = 0; } 184 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' 185 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 186 ; 187 188 pa_stats: 189 pa_stat 190 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 191 ; 192 193 patlist: 194 pattern 195 | patlist comma pattern { $$ = linkum($1, $3); } 196 ; 197 198 ppattern: 199 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 200 | ppattern '?' ppattern ':' ppattern %prec '?' 201 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 202 | ppattern bor ppattern %prec BOR 203 { $$ = op2(BOR, notnull($1), notnull($3)); } 204 | ppattern and ppattern %prec AND 205 { $$ = op2(AND, notnull($1), notnull($3)); } 206 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 207 | ppattern MATCHOP ppattern 208 { if (constnode($3)) 209 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 210 else 211 $$ = op3($2, (Node *)1, $1, $3); } 212 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 213 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 214 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 215 | re 216 | term 217 ; 218 219 pattern: 220 var ASGNOP pattern { $$ = op2($2, $1, $3); } 221 | pattern '?' pattern ':' pattern %prec '?' 222 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 223 | pattern bor pattern %prec BOR 224 { $$ = op2(BOR, notnull($1), notnull($3)); } 225 | pattern and pattern %prec AND 226 { $$ = op2(AND, notnull($1), notnull($3)); } 227 | pattern EQ pattern { $$ = op2($2, $1, $3); } 228 | pattern GE pattern { $$ = op2($2, $1, $3); } 229 | pattern GT pattern { $$ = op2($2, $1, $3); } 230 | pattern LE pattern { $$ = op2($2, $1, $3); } 231 | pattern LT pattern { $$ = op2($2, $1, $3); } 232 | pattern NE pattern { $$ = op2($2, $1, $3); } 233 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 234 | pattern MATCHOP pattern 235 { if (constnode($3)) 236 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 237 else 238 $$ = op3($2, (Node *)1, $1, $3); } 239 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 240 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 241 | pattern '|' GETLINE var { 242 if (safe) SYNTAX("cmd | getline is unsafe"); 243 else $$ = op3(GETLINE, $4, itonp($2), $1); } 244 | pattern '|' GETLINE { 245 if (safe) SYNTAX("cmd | getline is unsafe"); 246 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 247 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 248 | re 249 | term 250 ; 251 252 plist: 253 pattern comma pattern { $$ = linkum($1, $3); } 254 | plist comma pattern { $$ = linkum($1, $3); } 255 ; 256 257 pplist: 258 ppattern 259 | pplist comma ppattern { $$ = linkum($1, $3); } 260 ; 261 262 prarg: 263 /* empty */ { $$ = rectonode(); } 264 | pplist 265 | '(' plist ')' { $$ = $2; } 266 ; 267 268 print: 269 PRINT | PRINTF 270 ; 271 272 pst: 273 NL | ';' | pst NL | pst ';' 274 ; 275 276 rbrace: 277 '}' | rbrace NL 278 ; 279 280 re: 281 reg_expr 282 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } 283 | NOT re { $$ = op1(NOT, notnull($2)); } 284 ; 285 286 reg_expr: 287 '/' {startreg();} REGEXPR '/' { $$ = $3; } 288 ; 289 290 rparen: 291 ')' | rparen NL 292 ; 293 294 simple_stmt: 295 print prarg '|' term { 296 if (safe) SYNTAX("print | is unsafe"); 297 else $$ = stat3($1, $2, itonp($3), $4); } 298 | print prarg APPEND term { 299 if (safe) SYNTAX("print >> is unsafe"); 300 else $$ = stat3($1, $2, itonp($3), $4); } 301 | print prarg GT term { 302 if (safe) SYNTAX("print > is unsafe"); 303 else $$ = stat3($1, $2, itonp($3), $4); } 304 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 305 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 306 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 307 | pattern { $$ = exptostat($1); } 308 | error { yyclearin; SYNTAX("illegal statement"); } 309 ; 310 311 st: 312 nl 313 | ';' opt_nl 314 ; 315 316 stmt: 317 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 318 $$ = stat1(BREAK, NIL); } 319 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 320 $$ = stat1(CONTINUE, NIL); } 321 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 322 { $$ = stat2(DO, $3, notnull($7)); } 323 | EXIT pattern st { $$ = stat1(EXIT, $2); } 324 | EXIT st { $$ = stat1(EXIT, NIL); } 325 | for 326 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 327 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 328 | lbrace stmtlist rbrace { $$ = $2; } 329 | NEXT st { if (infunc) 330 SYNTAX("next is illegal inside a function"); 331 $$ = stat1(NEXT, NIL); } 332 | NEXTFILE st { if (infunc) 333 SYNTAX("nextfile is illegal inside a function"); 334 $$ = stat1(NEXTFILE, NIL); } 335 | RETURN pattern st { $$ = stat1(RETURN, $2); } 336 | RETURN st { $$ = stat1(RETURN, NIL); } 337 | simple_stmt st 338 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 339 | ';' opt_nl { $$ = 0; } 340 ; 341 342 stmtlist: 343 stmt 344 | stmtlist stmt { $$ = linkum($1, $2); } 345 ; 346 347 subop: 348 SUB | GSUB 349 ; 350 351 term: 352 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 353 | term '+' term { $$ = op2(ADD, $1, $3); } 354 | term '-' term { $$ = op2(MINUS, $1, $3); } 355 | term '*' term { $$ = op2(MULT, $1, $3); } 356 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 357 | term '%' term { $$ = op2(MOD, $1, $3); } 358 | term POWER term { $$ = op2(POWER, $1, $3); } 359 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 360 | '+' term %prec UMINUS { $$ = $2; } 361 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 362 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 363 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 364 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 365 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 366 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 367 | CLOSE term { $$ = op1(CLOSE, $2); } 368 | DECR var { $$ = op1(PREDECR, $2); } 369 | INCR var { $$ = op1(PREINCR, $2); } 370 | var DECR { $$ = op1(POSTDECR, $1); } 371 | var INCR { $$ = op1(POSTINCR, $1); } 372 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 373 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 374 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 375 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 376 | INDEX '(' pattern comma pattern ')' 377 { $$ = op2(INDEX, $3, $5); } 378 | INDEX '(' pattern comma reg_expr ')' 379 { SYNTAX("index() doesn't permit regular expressions"); 380 $$ = op2(INDEX, $3, (Node*)$5); } 381 | '(' pattern ')' { $$ = $2; } 382 | MATCHFCN '(' pattern comma reg_expr ')' 383 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } 384 | MATCHFCN '(' pattern comma pattern ')' 385 { if (constnode($5)) 386 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 387 else 388 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 389 | NUMBER { $$ = celltonode($1, CCON); } 390 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 391 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 392 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 393 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } 394 | SPLIT '(' pattern comma varname ')' 395 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 396 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 397 | STRING { $$ = celltonode($1, CCON); } 398 | subop '(' reg_expr comma pattern ')' 399 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } 400 | subop '(' pattern comma pattern ')' 401 { if (constnode($3)) 402 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 403 else 404 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 405 | subop '(' reg_expr comma pattern comma var ')' 406 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } 407 | subop '(' pattern comma pattern comma var ')' 408 { if (constnode($3)) 409 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 410 else 411 $$ = op4($1, (Node *)1, $3, $5, $7); } 412 | SUBSTR '(' pattern comma pattern comma pattern ')' 413 { $$ = op3(SUBSTR, $3, $5, $7); } 414 | SUBSTR '(' pattern comma pattern ')' 415 { $$ = op3(SUBSTR, $3, $5, NIL); } 416 | var 417 ; 418 419 var: 420 varname 421 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 422 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 423 | INDIRECT term { $$ = op1(INDIRECT, $2); } 424 ; 425 426 varlist: 427 /* nothing */ { arglist = $$ = 0; } 428 | VAR { arglist = $$ = celltonode($1,CVAR); } 429 | varlist comma VAR { 430 checkdup($1, $3); 431 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 432 ; 433 434 varname: 435 VAR { $$ = celltonode($1, CVAR); } 436 | ARG { $$ = op1(ARG, itonp($1)); } 437 | VARNF { $$ = op1(VARNF, (Node *) $1); } 438 ; 439 440 441 while: 442 WHILE '(' pattern rparen { $$ = notnull($3); } 443 ; 444 445 %% 446 447 void setfname(Cell *p) 448 { 449 if (isarr(p)) 450 SYNTAX("%s is an array, not a function", p->nval); 451 else if (isfcn(p)) 452 SYNTAX("you can't define function %s more than once", p->nval); 453 curfname = p->nval; 454 } 455 456 int constnode(Node *p) 457 { 458 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 459 } 460 461 char *strnode(Node *p) 462 { 463 return ((Cell *)(p->narg[0]))->sval; 464 } 465 466 Node *notnull(Node *n) 467 { 468 switch (n->nobj) { 469 case LE: case LT: case EQ: case NE: case GT: case GE: 470 case BOR: case AND: case NOT: 471 return n; 472 default: 473 return op2(NE, n, nullnode); 474 } 475 } 476 477 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 478 { 479 char *s = cp->nval; 480 for ( ; vl; vl = vl->nnext) { 481 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 482 SYNTAX("duplicate argument %s", s); 483 break; 484 } 485 } 486 } 487