Home | History | Annotate | Download | only in one-true-awk
      1 /****************************************************************
      2 Copyright (C) Lucent Technologies 1997
      3 All Rights Reserved
      4 
      5 Permission to use, copy, modify, and distribute this software and
      6 its documentation for any purpose and without fee is hereby
      7 granted, provided that the above copyright notice appear in all
      8 copies and that both that the copyright notice and this
      9 permission notice and warranty disclaimer appear in supporting
     10 documentation, and that the name Lucent Technologies or any of
     11 its entities not be used in advertising or publicity pertaining
     12 to distribution of the software without specific, written prior
     13 permission.
     14 
     15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
     16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
     17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
     18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
     20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
     21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
     22 THIS SOFTWARE.
     23 ****************************************************************/
     24 
     25 %{
     26 #include <stdio.h>
     27 #include <string.h>
     28 #include "awk.h"
     29 
     30 void checkdup(Node *list, Cell *item);
     31 int yywrap(void) { return(1); }
     32 
     33 Node	*beginloc = 0;
     34 Node	*endloc = 0;
     35 int	infunc	= 0;	/* = 1 if in arglist or body of func */
     36 int	inloop	= 0;	/* = 1 if in while, for, do */
     37 char	*curfname = 0;	/* current function name */
     38 Node	*arglist = 0;	/* list of args for current function */
     39 %}
     40 
     41 %union {
     42 	Node	*p;
     43 	Cell	*cp;
     44 	int	i;
     45 	char	*s;
     46 }
     47 
     48 %token	<i>	FIRSTTOKEN	/* must be first */
     49 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
     50 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
     51 %token	<i>	ARRAY
     52 %token	<i>	MATCH NOTMATCH MATCHOP
     53 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
     54 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
     55 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
     56 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
     57 %token	<i>	ADD MINUS MULT DIVIDE MOD
     58 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
     59 %token	<i>	PRINT PRINTF SPRINTF
     60 %token	<p>	ELSE INTEST CONDEXPR
     61 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
     62 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
     63 %token	<s>	REGEXPR
     64 
     65 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
     66 %type	<p>	pa_pat pa_stat pa_stats
     67 %type	<s>	reg_expr
     68 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
     69 %type	<p>	var varname funcname varlist
     70 %type	<p>	for if else while
     71 %type	<i>	do st
     72 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
     73 %type	<i>	subop print
     74 
     75 %right	ASGNOP
     76 %right	'?'
     77 %right	':'
     78 %left	BOR
     79 %left	AND
     80 %left	GETLINE
     81 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
     82 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
     83 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
     84 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
     85 %left	REGEXPR VAR VARNF IVAR WHILE '('
     86 %left	CAT
     87 %left	'+' '-'
     88 %left	'*' '/' '%'
     89 %left	NOT UMINUS
     90 %right	POWER
     91 %right	DECR INCR
     92 %left	INDIRECT
     93 %token	LASTTOKEN	/* must be last */
     94 
     95 %%
     96 
     97 program:
     98 	  pas	{ if (errorflag==0)
     99 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
    100 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
    101 	;
    102 
    103 and:
    104 	  AND | and NL
    105 	;
    106 
    107 bor:
    108 	  BOR | bor NL
    109 	;
    110 
    111 comma:
    112 	  ',' | comma NL
    113 	;
    114 
    115 do:
    116 	  DO | do NL
    117 	;
    118 
    119 else:
    120 	  ELSE | else NL
    121 	;
    122 
    123 for:
    124 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
    125 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
    126 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
    127 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
    128 	| FOR '(' varname IN varname rparen {inloop++;} stmt
    129 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
    130 	;
    131 
    132 funcname:
    133 	  VAR	{ setfname($1); }
    134 	| CALL	{ setfname($1); }
    135 	;
    136 
    137 if:
    138 	  IF '(' pattern rparen		{ $$ = notnull($3); }
    139 	;
    140 
    141 lbrace:
    142 	  '{' | lbrace NL
    143 	;
    144 
    145 nl:
    146 	  NL | nl NL
    147 	;
    148 
    149 opt_nl:
    150 	  /* empty */	{ $$ = 0; }
    151 	| nl
    152 	;
    153 
    154 opt_pst:
    155 	  /* empty */	{ $$ = 0; }
    156 	| pst
    157 	;
    158 
    159 
    160 opt_simple_stmt:
    161 	  /* empty */			{ $$ = 0; }
    162 	| simple_stmt
    163 	;
    164 
    165 pas:
    166 	  opt_pst			{ $$ = 0; }
    167 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
    168 	;
    169 
    170 pa_pat:
    171 	  pattern	{ $$ = notnull($1); }
    172 	;
    173 
    174 pa_stat:
    175 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
    176 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
    177 	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
    178 	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
    179 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
    180 	| XBEGIN lbrace stmtlist '}'
    181 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
    182 	| XEND lbrace stmtlist '}'
    183 		{ endloc = linkum(endloc, $3); $$ = 0; }
    184 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
    185 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
    186 	;
    187 
    188 pa_stats:
    189 	  pa_stat
    190 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
    191 	;
    192 
    193 patlist:
    194 	  pattern
    195 	| patlist comma pattern		{ $$ = linkum($1, $3); }
    196 	;
    197 
    198 ppattern:
    199 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
    200 	| ppattern '?' ppattern ':' ppattern %prec '?'
    201 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
    202 	| ppattern bor ppattern %prec BOR
    203 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
    204 	| ppattern and ppattern %prec AND
    205 		{ $$ = op2(AND, notnull($1), notnull($3)); }
    206 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
    207 	| ppattern MATCHOP ppattern
    208 		{ if (constnode($3))
    209 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
    210 		  else
    211 			$$ = op3($2, (Node *)1, $1, $3); }
    212 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
    213 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
    214 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
    215 	| re
    216 	| term
    217 	;
    218 
    219 pattern:
    220 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
    221 	| pattern '?' pattern ':' pattern %prec '?'
    222 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
    223 	| pattern bor pattern %prec BOR
    224 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
    225 	| pattern and pattern %prec AND
    226 		{ $$ = op2(AND, notnull($1), notnull($3)); }
    227 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
    228 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
    229 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
    230 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
    231 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
    232 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
    233 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
    234 	| pattern MATCHOP pattern
    235 		{ if (constnode($3))
    236 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
    237 		  else
    238 			$$ = op3($2, (Node *)1, $1, $3); }
    239 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
    240 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
    241 	| pattern '|' GETLINE var	{
    242 			if (safe) SYNTAX("cmd | getline is unsafe");
    243 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
    244 	| pattern '|' GETLINE		{
    245 			if (safe) SYNTAX("cmd | getline is unsafe");
    246 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
    247 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
    248 	| re
    249 	| term
    250 	;
    251 
    252 plist:
    253 	  pattern comma pattern		{ $$ = linkum($1, $3); }
    254 	| plist comma pattern		{ $$ = linkum($1, $3); }
    255 	;
    256 
    257 pplist:
    258 	  ppattern
    259 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
    260 	;
    261 
    262 prarg:
    263 	  /* empty */			{ $$ = rectonode(); }
    264 	| pplist
    265 	| '(' plist ')'			{ $$ = $2; }
    266 	;
    267 
    268 print:
    269 	  PRINT | PRINTF
    270 	;
    271 
    272 pst:
    273 	  NL | ';' | pst NL | pst ';'
    274 	;
    275 
    276 rbrace:
    277 	  '}' | rbrace NL
    278 	;
    279 
    280 re:
    281 	   reg_expr
    282 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
    283 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
    284 	;
    285 
    286 reg_expr:
    287 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
    288 	;
    289 
    290 rparen:
    291 	  ')' | rparen NL
    292 	;
    293 
    294 simple_stmt:
    295 	  print prarg '|' term		{
    296 			if (safe) SYNTAX("print | is unsafe");
    297 			else $$ = stat3($1, $2, itonp($3), $4); }
    298 	| print prarg APPEND term	{
    299 			if (safe) SYNTAX("print >> is unsafe");
    300 			else $$ = stat3($1, $2, itonp($3), $4); }
    301 	| print prarg GT term		{
    302 			if (safe) SYNTAX("print > is unsafe");
    303 			else $$ = stat3($1, $2, itonp($3), $4); }
    304 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
    305 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
    306 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
    307 	| pattern			{ $$ = exptostat($1); }
    308 	| error				{ yyclearin; SYNTAX("illegal statement"); }
    309 	;
    310 
    311 st:
    312 	  nl
    313 	| ';' opt_nl
    314 	;
    315 
    316 stmt:
    317 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
    318 				  $$ = stat1(BREAK, NIL); }
    319 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
    320 				  $$ = stat1(CONTINUE, NIL); }
    321 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
    322 		{ $$ = stat2(DO, $3, notnull($7)); }
    323 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
    324 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
    325 	| for
    326 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
    327 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
    328 	| lbrace stmtlist rbrace { $$ = $2; }
    329 	| NEXT st	{ if (infunc)
    330 				SYNTAX("next is illegal inside a function");
    331 			  $$ = stat1(NEXT, NIL); }
    332 	| NEXTFILE st	{ if (infunc)
    333 				SYNTAX("nextfile is illegal inside a function");
    334 			  $$ = stat1(NEXTFILE, NIL); }
    335 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
    336 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
    337 	| simple_stmt st
    338 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
    339 	| ';' opt_nl		{ $$ = 0; }
    340 	;
    341 
    342 stmtlist:
    343 	  stmt
    344 	| stmtlist stmt		{ $$ = linkum($1, $2); }
    345 	;
    346 
    347 subop:
    348 	  SUB | GSUB
    349 	;
    350 
    351 term:
    352  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
    353  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
    354 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
    355 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
    356 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
    357 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
    358 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
    359 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
    360 	| '+' term %prec UMINUS		{ $$ = $2; }
    361 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
    362 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
    363 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
    364 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
    365 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
    366 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
    367 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
    368 	| DECR var			{ $$ = op1(PREDECR, $2); }
    369 	| INCR var			{ $$ = op1(PREINCR, $2); }
    370 	| var DECR			{ $$ = op1(POSTDECR, $1); }
    371 	| var INCR			{ $$ = op1(POSTINCR, $1); }
    372 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
    373 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
    374 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
    375 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
    376 	| INDEX '(' pattern comma pattern ')'
    377 		{ $$ = op2(INDEX, $3, $5); }
    378 	| INDEX '(' pattern comma reg_expr ')'
    379 		{ SYNTAX("index() doesn't permit regular expressions");
    380 		  $$ = op2(INDEX, $3, (Node*)$5); }
    381 	| '(' pattern ')'		{ $$ = $2; }
    382 	| MATCHFCN '(' pattern comma reg_expr ')'
    383 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
    384 	| MATCHFCN '(' pattern comma pattern ')'
    385 		{ if (constnode($5))
    386 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
    387 		  else
    388 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
    389 	| NUMBER			{ $$ = celltonode($1, CCON); }
    390 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
    391 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
    392 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
    393 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
    394 	| SPLIT '(' pattern comma varname ')'
    395 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
    396 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
    397 	| STRING	 		{ $$ = celltonode($1, CCON); }
    398 	| subop '(' reg_expr comma pattern ')'
    399 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
    400 	| subop '(' pattern comma pattern ')'
    401 		{ if (constnode($3))
    402 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
    403 		  else
    404 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
    405 	| subop '(' reg_expr comma pattern comma var ')'
    406 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
    407 	| subop '(' pattern comma pattern comma var ')'
    408 		{ if (constnode($3))
    409 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
    410 		  else
    411 			$$ = op4($1, (Node *)1, $3, $5, $7); }
    412 	| SUBSTR '(' pattern comma pattern comma pattern ')'
    413 		{ $$ = op3(SUBSTR, $3, $5, $7); }
    414 	| SUBSTR '(' pattern comma pattern ')'
    415 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
    416 	| var
    417 	;
    418 
    419 var:
    420 	  varname
    421 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
    422 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
    423 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
    424 	;
    425 
    426 varlist:
    427 	  /* nothing */		{ arglist = $$ = 0; }
    428 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
    429 	| varlist comma VAR	{
    430 			checkdup($1, $3);
    431 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
    432 	;
    433 
    434 varname:
    435 	  VAR			{ $$ = celltonode($1, CVAR); }
    436 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
    437 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
    438 	;
    439 
    440 
    441 while:
    442 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
    443 	;
    444 
    445 %%
    446 
    447 void setfname(Cell *p)
    448 {
    449 	if (isarr(p))
    450 		SYNTAX("%s is an array, not a function", p->nval);
    451 	else if (isfcn(p))
    452 		SYNTAX("you can't define function %s more than once", p->nval);
    453 	curfname = p->nval;
    454 }
    455 
    456 int constnode(Node *p)
    457 {
    458 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
    459 }
    460 
    461 char *strnode(Node *p)
    462 {
    463 	return ((Cell *)(p->narg[0]))->sval;
    464 }
    465 
    466 Node *notnull(Node *n)
    467 {
    468 	switch (n->nobj) {
    469 	case LE: case LT: case EQ: case NE: case GT: case GE:
    470 	case BOR: case AND: case NOT:
    471 		return n;
    472 	default:
    473 		return op2(NE, n, nullnode);
    474 	}
    475 }
    476 
    477 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
    478 {
    479 	char *s = cp->nval;
    480 	for ( ; vl; vl = vl->nnext) {
    481 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
    482 			SYNTAX("duplicate argument %s", s);
    483 			break;
    484 		}
    485 	}
    486 }
    487