1 /* 2 ** $Id: llex.c,v 2.63.1.2 2013/08/30 15:49:41 roberto Exp $ 3 ** Lexical Analyzer 4 ** See Copyright Notice in lua.h 5 */ 6 7 8 #ifndef SYSLINUX 9 #include <locale.h> 10 #else 11 #define getlocaledecpoint() '.' 12 #endif 13 #include <string.h> 14 15 #define llex_c 16 #define LUA_CORE 17 18 #include "lua.h" 19 20 #include "lctype.h" 21 #include "ldo.h" 22 #include "llex.h" 23 #include "lobject.h" 24 #include "lparser.h" 25 #include "lstate.h" 26 #include "lstring.h" 27 #include "ltable.h" 28 #include "lzio.h" 29 30 31 32 #define next(ls) (ls->current = zgetc(ls->z)) 33 34 35 36 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') 37 38 39 /* ORDER RESERVED */ 40 static const char *const luaX_tokens [] = { 41 "and", "break", "do", "else", "elseif", 42 "end", "false", "for", "function", "goto", "if", 43 "in", "local", "nil", "not", "or", "repeat", 44 "return", "then", "true", "until", "while", 45 "..", "...", "==", ">=", "<=", "~=", "::", "<eof>", 46 "<number>", "<name>", "<string>" 47 }; 48 49 50 #define save_and_next(ls) (save(ls, ls->current), next(ls)) 51 52 53 static l_noret lexerror (LexState *ls, const char *msg, int token); 54 55 56 static void save (LexState *ls, int c) { 57 Mbuffer *b = ls->buff; 58 if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { 59 size_t newsize; 60 if (luaZ_sizebuffer(b) >= MAX_SIZET/2) 61 lexerror(ls, "lexical element too long", 0); 62 newsize = luaZ_sizebuffer(b) * 2; 63 luaZ_resizebuffer(ls->L, b, newsize); 64 } 65 b->buffer[luaZ_bufflen(b)++] = cast(char, c); 66 } 67 68 69 void luaX_init (lua_State *L) { 70 int i; 71 for (i=0; i<NUM_RESERVED; i++) { 72 TString *ts = luaS_new(L, luaX_tokens[i]); 73 luaS_fix(ts); /* reserved words are never collected */ 74 ts->tsv.extra = cast_byte(i+1); /* reserved word */ 75 } 76 } 77 78 79 const char *luaX_token2str (LexState *ls, int token) { 80 if (token < FIRST_RESERVED) { /* single-byte symbols? */ 81 lua_assert(token == cast(unsigned char, token)); 82 return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) : 83 luaO_pushfstring(ls->L, "char(%d)", token); 84 } 85 else { 86 const char *s = luaX_tokens[token - FIRST_RESERVED]; 87 if (token < TK_EOS) /* fixed format (symbols and reserved words)? */ 88 return luaO_pushfstring(ls->L, LUA_QS, s); 89 else /* names, strings, and numerals */ 90 return s; 91 } 92 } 93 94 95 static const char *txtToken (LexState *ls, int token) { 96 switch (token) { 97 case TK_NAME: 98 case TK_STRING: 99 case TK_NUMBER: 100 save(ls, '\0'); 101 return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff)); 102 default: 103 return luaX_token2str(ls, token); 104 } 105 } 106 107 108 static l_noret lexerror (LexState *ls, const char *msg, int token) { 109 char buff[LUA_IDSIZE]; 110 luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE); 111 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); 112 if (token) 113 luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); 114 luaD_throw(ls->L, LUA_ERRSYNTAX); 115 } 116 117 118 l_noret luaX_syntaxerror (LexState *ls, const char *msg) { 119 lexerror(ls, msg, ls->t.token); 120 } 121 122 123 /* 124 ** creates a new string and anchors it in function's table so that 125 ** it will not be collected until the end of the function's compilation 126 ** (by that time it should be anchored in function's prototype) 127 */ 128 TString *luaX_newstring (LexState *ls, const char *str, size_t l) { 129 lua_State *L = ls->L; 130 TValue *o; /* entry for `str' */ 131 TString *ts = luaS_newlstr(L, str, l); /* create new string */ 132 setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */ 133 o = luaH_set(L, ls->fs->h, L->top - 1); 134 if (ttisnil(o)) { /* not in use yet? (see 'addK') */ 135 /* boolean value does not need GC barrier; 136 table has no metatable, so it does not need to invalidate cache */ 137 setbvalue(o, 1); /* t[string] = true */ 138 luaC_checkGC(L); 139 } 140 else { /* string already present */ 141 ts = rawtsvalue(keyfromval(o)); /* re-use value previously stored */ 142 } 143 L->top--; /* remove string from stack */ 144 return ts; 145 } 146 147 148 /* 149 ** increment line number and skips newline sequence (any of 150 ** \n, \r, \n\r, or \r\n) 151 */ 152 static void inclinenumber (LexState *ls) { 153 int old = ls->current; 154 lua_assert(currIsNewline(ls)); 155 next(ls); /* skip `\n' or `\r' */ 156 if (currIsNewline(ls) && ls->current != old) 157 next(ls); /* skip `\n\r' or `\r\n' */ 158 if (++ls->linenumber >= MAX_INT) 159 luaX_syntaxerror(ls, "chunk has too many lines"); 160 } 161 162 163 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source, 164 int firstchar) { 165 ls->decpoint = '.'; 166 ls->L = L; 167 ls->current = firstchar; 168 ls->lookahead.token = TK_EOS; /* no look-ahead token */ 169 ls->z = z; 170 ls->fs = NULL; 171 ls->linenumber = 1; 172 ls->lastline = 1; 173 ls->source = source; 174 ls->envn = luaS_new(L, LUA_ENV); /* create env name */ 175 luaS_fix(ls->envn); /* never collect this name */ 176 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ 177 } 178 179 180 181 /* 182 ** ======================================================= 183 ** LEXICAL ANALYZER 184 ** ======================================================= 185 */ 186 187 188 189 static int check_next (LexState *ls, const char *set) { 190 if (ls->current == '\0' || !strchr(set, ls->current)) 191 return 0; 192 save_and_next(ls); 193 return 1; 194 } 195 196 197 /* 198 ** change all characters 'from' in buffer to 'to' 199 */ 200 static void buffreplace (LexState *ls, char from, char to) { 201 size_t n = luaZ_bufflen(ls->buff); 202 char *p = luaZ_buffer(ls->buff); 203 while (n--) 204 if (p[n] == from) p[n] = to; 205 } 206 207 208 #if !defined(getlocaledecpoint) 209 #define getlocaledecpoint() (localeconv()->decimal_point[0]) 210 #endif 211 212 213 #define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e) 214 215 /* 216 ** in case of format error, try to change decimal point separator to 217 ** the one defined in the current locale and check again 218 */ 219 static void trydecpoint (LexState *ls, SemInfo *seminfo) { 220 char old = ls->decpoint; 221 ls->decpoint = getlocaledecpoint(); 222 buffreplace(ls, old, ls->decpoint); /* try new decimal separator */ 223 if (!buff2d(ls->buff, &seminfo->r)) { 224 /* format error with correct decimal point: no more options */ 225 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ 226 lexerror(ls, "malformed number", TK_NUMBER); 227 } 228 } 229 230 231 /* LUA_NUMBER */ 232 /* 233 ** this function is quite liberal in what it accepts, as 'luaO_str2d' 234 ** will reject ill-formed numerals. 235 */ 236 static void read_numeral (LexState *ls, SemInfo *seminfo) { 237 const char *expo = "Ee"; 238 int first = ls->current; 239 lua_assert(lisdigit(ls->current)); 240 save_and_next(ls); 241 if (first == '0' && check_next(ls, "Xx")) /* hexadecimal? */ 242 expo = "Pp"; 243 for (;;) { 244 if (check_next(ls, expo)) /* exponent part? */ 245 check_next(ls, "+-"); /* optional exponent sign */ 246 if (lisxdigit(ls->current) || ls->current == '.') 247 save_and_next(ls); 248 else break; 249 } 250 save(ls, '\0'); 251 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ 252 if (!buff2d(ls->buff, &seminfo->r)) /* format error? */ 253 trydecpoint(ls, seminfo); /* try to update decimal point separator */ 254 } 255 256 257 /* 258 ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or 259 ** -1 if sequence is malformed 260 */ 261 static int skip_sep (LexState *ls) { 262 int count = 0; 263 int s = ls->current; 264 lua_assert(s == '[' || s == ']'); 265 save_and_next(ls); 266 while (ls->current == '=') { 267 save_and_next(ls); 268 count++; 269 } 270 return (ls->current == s) ? count : (-count) - 1; 271 } 272 273 274 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { 275 save_and_next(ls); /* skip 2nd `[' */ 276 if (currIsNewline(ls)) /* string starts with a newline? */ 277 inclinenumber(ls); /* skip it */ 278 for (;;) { 279 switch (ls->current) { 280 case EOZ: 281 lexerror(ls, (seminfo) ? "unfinished long string" : 282 "unfinished long comment", TK_EOS); 283 break; /* to avoid warnings */ 284 case ']': { 285 if (skip_sep(ls) == sep) { 286 save_and_next(ls); /* skip 2nd `]' */ 287 goto endloop; 288 } 289 break; 290 } 291 case '\n': case '\r': { 292 save(ls, '\n'); 293 inclinenumber(ls); 294 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ 295 break; 296 } 297 default: { 298 if (seminfo) save_and_next(ls); 299 else next(ls); 300 } 301 } 302 } endloop: 303 if (seminfo) 304 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), 305 luaZ_bufflen(ls->buff) - 2*(2 + sep)); 306 } 307 308 309 static void escerror (LexState *ls, int *c, int n, const char *msg) { 310 int i; 311 luaZ_resetbuffer(ls->buff); /* prepare error message */ 312 save(ls, '\\'); 313 for (i = 0; i < n && c[i] != EOZ; i++) 314 save(ls, c[i]); 315 lexerror(ls, msg, TK_STRING); 316 } 317 318 319 static int readhexaesc (LexState *ls) { 320 int c[3], i; /* keep input for error message */ 321 int r = 0; /* result accumulator */ 322 c[0] = 'x'; /* for error message */ 323 for (i = 1; i < 3; i++) { /* read two hexadecimal digits */ 324 c[i] = next(ls); 325 if (!lisxdigit(c[i])) 326 escerror(ls, c, i + 1, "hexadecimal digit expected"); 327 r = (r << 4) + luaO_hexavalue(c[i]); 328 } 329 return r; 330 } 331 332 333 static int readdecesc (LexState *ls) { 334 int c[3], i; 335 int r = 0; /* result accumulator */ 336 for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */ 337 c[i] = ls->current; 338 r = 10*r + c[i] - '0'; 339 next(ls); 340 } 341 if (r > UCHAR_MAX) 342 escerror(ls, c, i, "decimal escape too large"); 343 return r; 344 } 345 346 347 static void read_string (LexState *ls, int del, SemInfo *seminfo) { 348 save_and_next(ls); /* keep delimiter (for error messages) */ 349 while (ls->current != del) { 350 switch (ls->current) { 351 case EOZ: 352 lexerror(ls, "unfinished string", TK_EOS); 353 break; /* to avoid warnings */ 354 case '\n': 355 case '\r': 356 lexerror(ls, "unfinished string", TK_STRING); 357 break; /* to avoid warnings */ 358 case '\\': { /* escape sequences */ 359 int c; /* final character to be saved */ 360 next(ls); /* do not save the `\' */ 361 switch (ls->current) { 362 case 'a': c = '\a'; goto read_save; 363 case 'b': c = '\b'; goto read_save; 364 case 'f': c = '\f'; goto read_save; 365 case 'n': c = '\n'; goto read_save; 366 case 'r': c = '\r'; goto read_save; 367 case 't': c = '\t'; goto read_save; 368 case 'v': c = '\v'; goto read_save; 369 case 'x': c = readhexaesc(ls); goto read_save; 370 case '\n': case '\r': 371 inclinenumber(ls); c = '\n'; goto only_save; 372 case '\\': case '\"': case '\'': 373 c = ls->current; goto read_save; 374 case EOZ: goto no_save; /* will raise an error next loop */ 375 case 'z': { /* zap following span of spaces */ 376 next(ls); /* skip the 'z' */ 377 while (lisspace(ls->current)) { 378 if (currIsNewline(ls)) inclinenumber(ls); 379 else next(ls); 380 } 381 goto no_save; 382 } 383 default: { 384 if (!lisdigit(ls->current)) 385 escerror(ls, &ls->current, 1, "invalid escape sequence"); 386 /* digital escape \ddd */ 387 c = readdecesc(ls); 388 goto only_save; 389 } 390 } 391 read_save: next(ls); /* read next character */ 392 only_save: save(ls, c); /* save 'c' */ 393 no_save: break; 394 } 395 default: 396 save_and_next(ls); 397 } 398 } 399 save_and_next(ls); /* skip delimiter */ 400 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, 401 luaZ_bufflen(ls->buff) - 2); 402 } 403 404 405 static int llex (LexState *ls, SemInfo *seminfo) { 406 luaZ_resetbuffer(ls->buff); 407 for (;;) { 408 switch (ls->current) { 409 case '\n': case '\r': { /* line breaks */ 410 inclinenumber(ls); 411 break; 412 } 413 case ' ': case '\f': case '\t': case '\v': { /* spaces */ 414 next(ls); 415 break; 416 } 417 case '-': { /* '-' or '--' (comment) */ 418 next(ls); 419 if (ls->current != '-') return '-'; 420 /* else is a comment */ 421 next(ls); 422 if (ls->current == '[') { /* long comment? */ 423 int sep = skip_sep(ls); 424 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ 425 if (sep >= 0) { 426 read_long_string(ls, NULL, sep); /* skip long comment */ 427 luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ 428 break; 429 } 430 } 431 /* else short comment */ 432 while (!currIsNewline(ls) && ls->current != EOZ) 433 next(ls); /* skip until end of line (or end of file) */ 434 break; 435 } 436 case '[': { /* long string or simply '[' */ 437 int sep = skip_sep(ls); 438 if (sep >= 0) { 439 read_long_string(ls, seminfo, sep); 440 return TK_STRING; 441 } 442 else if (sep == -1) return '['; 443 else lexerror(ls, "invalid long string delimiter", TK_STRING); 444 } 445 case '=': { 446 next(ls); 447 if (ls->current != '=') return '='; 448 else { next(ls); return TK_EQ; } 449 } 450 case '<': { 451 next(ls); 452 if (ls->current != '=') return '<'; 453 else { next(ls); return TK_LE; } 454 } 455 case '>': { 456 next(ls); 457 if (ls->current != '=') return '>'; 458 else { next(ls); return TK_GE; } 459 } 460 case '~': { 461 next(ls); 462 if (ls->current != '=') return '~'; 463 else { next(ls); return TK_NE; } 464 } 465 case ':': { 466 next(ls); 467 if (ls->current != ':') return ':'; 468 else { next(ls); return TK_DBCOLON; } 469 } 470 case '"': case '\'': { /* short literal strings */ 471 read_string(ls, ls->current, seminfo); 472 return TK_STRING; 473 } 474 case '.': { /* '.', '..', '...', or number */ 475 save_and_next(ls); 476 if (check_next(ls, ".")) { 477 if (check_next(ls, ".")) 478 return TK_DOTS; /* '...' */ 479 else return TK_CONCAT; /* '..' */ 480 } 481 else if (!lisdigit(ls->current)) return '.'; 482 /* else go through */ 483 } 484 case '0': case '1': case '2': case '3': case '4': 485 case '5': case '6': case '7': case '8': case '9': { 486 read_numeral(ls, seminfo); 487 return TK_NUMBER; 488 } 489 case EOZ: { 490 return TK_EOS; 491 } 492 default: { 493 if (lislalpha(ls->current)) { /* identifier or reserved word? */ 494 TString *ts; 495 do { 496 save_and_next(ls); 497 } while (lislalnum(ls->current)); 498 ts = luaX_newstring(ls, luaZ_buffer(ls->buff), 499 luaZ_bufflen(ls->buff)); 500 seminfo->ts = ts; 501 if (isreserved(ts)) /* reserved word? */ 502 return ts->tsv.extra - 1 + FIRST_RESERVED; 503 else { 504 return TK_NAME; 505 } 506 } 507 else { /* single-char tokens (+ - / ...) */ 508 int c = ls->current; 509 next(ls); 510 return c; 511 } 512 } 513 } 514 } 515 } 516 517 518 void luaX_next (LexState *ls) { 519 ls->lastline = ls->linenumber; 520 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ 521 ls->t = ls->lookahead; /* use this one */ 522 ls->lookahead.token = TK_EOS; /* and discharge it */ 523 } 524 else 525 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ 526 } 527 528 529 int luaX_lookahead (LexState *ls) { 530 lua_assert(ls->lookahead.token == TK_EOS); 531 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); 532 return ls->lookahead.token; 533 } 534 535