Home | History | Annotate | Download | only in src
      1 /*
      2 ** $Id: llex.c,v 2.63 2013/03/16 21:10:18 roberto Exp $
      3 ** Lexical Analyzer
      4 ** See Copyright Notice in lua.h
      5 */
      6 
      7 
      8 #include <locale.h>
      9 #include <string.h>
     10 
     11 #define llex_c
     12 #define LUA_CORE
     13 
     14 #include "lua.h"
     15 
     16 #include "lctype.h"
     17 #include "ldo.h"
     18 #include "llex.h"
     19 #include "lobject.h"
     20 #include "lparser.h"
     21 #include "lstate.h"
     22 #include "lstring.h"
     23 #include "ltable.h"
     24 #include "lzio.h"
     25 
     26 
     27 
     28 #define next(ls) (ls->current = zgetc(ls->z))
     29 
     30 
     31 
     32 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
     33 
     34 
     35 /* ORDER RESERVED */
     36 static const char *const luaX_tokens [] = {
     37     "and", "break", "do", "else", "elseif",
     38     "end", "false", "for", "function", "goto", "if",
     39     "in", "local", "nil", "not", "or", "repeat",
     40     "return", "then", "true", "until", "while",
     41     "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
     42     "<number>", "<name>", "<string>"
     43 };
     44 
     45 
     46 #define save_and_next(ls) (save(ls, ls->current), next(ls))
     47 
     48 
     49 static l_noret lexerror (LexState *ls, const char *msg, int token);
     50 
     51 
     52 static void save (LexState *ls, int c) {
     53   Mbuffer *b = ls->buff;
     54   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
     55     size_t newsize;
     56     if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
     57       lexerror(ls, "lexical element too long", 0);
     58     newsize = luaZ_sizebuffer(b) * 2;
     59     luaZ_resizebuffer(ls->L, b, newsize);
     60   }
     61   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
     62 }
     63 
     64 
     65 void luaX_init (lua_State *L) {
     66   int i;
     67   for (i=0; i<NUM_RESERVED; i++) {
     68     TString *ts = luaS_new(L, luaX_tokens[i]);
     69     luaS_fix(ts);  /* reserved words are never collected */
     70     ts->tsv.extra = cast_byte(i+1);  /* reserved word */
     71   }
     72 }
     73 
     74 
     75 const char *luaX_token2str (LexState *ls, int token) {
     76   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
     77     lua_assert(token == cast(unsigned char, token));
     78     return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
     79                               luaO_pushfstring(ls->L, "char(%d)", token);
     80   }
     81   else {
     82     const char *s = luaX_tokens[token - FIRST_RESERVED];
     83     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
     84       return luaO_pushfstring(ls->L, LUA_QS, s);
     85     else  /* names, strings, and numerals */
     86       return s;
     87   }
     88 }
     89 
     90 
     91 static const char *txtToken (LexState *ls, int token) {
     92   switch (token) {
     93     case TK_NAME:
     94     case TK_STRING:
     95     case TK_NUMBER:
     96       save(ls, '\0');
     97       return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
     98     default:
     99       return luaX_token2str(ls, token);
    100   }
    101 }
    102 
    103 
    104 static l_noret lexerror (LexState *ls, const char *msg, int token) {
    105   char buff[LUA_IDSIZE];
    106   luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
    107   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
    108   if (token)
    109     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
    110   luaD_throw(ls->L, LUA_ERRSYNTAX);
    111 }
    112 
    113 
    114 l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
    115   lexerror(ls, msg, ls->t.token);
    116 }
    117 
    118 
    119 /*
    120 ** creates a new string and anchors it in function's table so that
    121 ** it will not be collected until the end of the function's compilation
    122 ** (by that time it should be anchored in function's prototype)
    123 */
    124 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
    125   lua_State *L = ls->L;
    126   TValue *o;  /* entry for `str' */
    127   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
    128   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
    129   o = luaH_set(L, ls->fs->h, L->top - 1);
    130   if (ttisnil(o)) {  /* not in use yet? (see 'addK') */
    131     /* boolean value does not need GC barrier;
    132        table has no metatable, so it does not need to invalidate cache */
    133     setbvalue(o, 1);  /* t[string] = true */
    134     luaC_checkGC(L);
    135   }
    136   L->top--;  /* remove string from stack */
    137   return ts;
    138 }
    139 
    140 
    141 /*
    142 ** increment line number and skips newline sequence (any of
    143 ** \n, \r, \n\r, or \r\n)
    144 */
    145 static void inclinenumber (LexState *ls) {
    146   int old = ls->current;
    147   lua_assert(currIsNewline(ls));
    148   next(ls);  /* skip `\n' or `\r' */
    149   if (currIsNewline(ls) && ls->current != old)
    150     next(ls);  /* skip `\n\r' or `\r\n' */
    151   if (++ls->linenumber >= MAX_INT)
    152     luaX_syntaxerror(ls, "chunk has too many lines");
    153 }
    154 
    155 
    156 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
    157                     int firstchar) {
    158   ls->decpoint = '.';
    159   ls->L = L;
    160   ls->current = firstchar;
    161   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
    162   ls->z = z;
    163   ls->fs = NULL;
    164   ls->linenumber = 1;
    165   ls->lastline = 1;
    166   ls->source = source;
    167   ls->envn = luaS_new(L, LUA_ENV);  /* create env name */
    168   luaS_fix(ls->envn);  /* never collect this name */
    169   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
    170 }
    171 
    172 
    173 
    174 /*
    175 ** =======================================================
    176 ** LEXICAL ANALYZER
    177 ** =======================================================
    178 */
    179 
    180 
    181 
    182 static int check_next (LexState *ls, const char *set) {
    183   if (ls->current == '\0' || !strchr(set, ls->current))
    184     return 0;
    185   save_and_next(ls);
    186   return 1;
    187 }
    188 
    189 
    190 /*
    191 ** change all characters 'from' in buffer to 'to'
    192 */
    193 static void buffreplace (LexState *ls, char from, char to) {
    194   size_t n = luaZ_bufflen(ls->buff);
    195   char *p = luaZ_buffer(ls->buff);
    196   while (n--)
    197     if (p[n] == from) p[n] = to;
    198 }
    199 
    200 
    201 #if !defined(getlocaledecpoint)
    202 #define getlocaledecpoint()	(localeconv()->decimal_point[0])
    203 #endif
    204 
    205 
    206 #define buff2d(b,e)	luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
    207 
    208 /*
    209 ** in case of format error, try to change decimal point separator to
    210 ** the one defined in the current locale and check again
    211 */
    212 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
    213   char old = ls->decpoint;
    214   ls->decpoint = getlocaledecpoint();
    215   buffreplace(ls, old, ls->decpoint);  /* try new decimal separator */
    216   if (!buff2d(ls->buff, &seminfo->r)) {
    217     /* format error with correct decimal point: no more options */
    218     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
    219     lexerror(ls, "malformed number", TK_NUMBER);
    220   }
    221 }
    222 
    223 
    224 /* LUA_NUMBER */
    225 /*
    226 ** this function is quite liberal in what it accepts, as 'luaO_str2d'
    227 ** will reject ill-formed numerals.
    228 */
    229 static void read_numeral (LexState *ls, SemInfo *seminfo) {
    230   const char *expo = "Ee";
    231   int first = ls->current;
    232   lua_assert(lisdigit(ls->current));
    233   save_and_next(ls);
    234   if (first == '0' && check_next(ls, "Xx"))  /* hexadecimal? */
    235     expo = "Pp";
    236   for (;;) {
    237     if (check_next(ls, expo))  /* exponent part? */
    238       check_next(ls, "+-");  /* optional exponent sign */
    239     if (lisxdigit(ls->current) || ls->current == '.')
    240       save_and_next(ls);
    241     else  break;
    242   }
    243   save(ls, '\0');
    244   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
    245   if (!buff2d(ls->buff, &seminfo->r))  /* format error? */
    246     trydecpoint(ls, seminfo); /* try to update decimal point separator */
    247 }
    248 
    249 
    250 /*
    251 ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
    252 ** -1 if sequence is malformed
    253 */
    254 static int skip_sep (LexState *ls) {
    255   int count = 0;
    256   int s = ls->current;
    257   lua_assert(s == '[' || s == ']');
    258   save_and_next(ls);
    259   while (ls->current == '=') {
    260     save_and_next(ls);
    261     count++;
    262   }
    263   return (ls->current == s) ? count : (-count) - 1;
    264 }
    265 
    266 
    267 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
    268   save_and_next(ls);  /* skip 2nd `[' */
    269   if (currIsNewline(ls))  /* string starts with a newline? */
    270     inclinenumber(ls);  /* skip it */
    271   for (;;) {
    272     switch (ls->current) {
    273       case EOZ:
    274         lexerror(ls, (seminfo) ? "unfinished long string" :
    275                                  "unfinished long comment", TK_EOS);
    276         break;  /* to avoid warnings */
    277       case ']': {
    278         if (skip_sep(ls) == sep) {
    279           save_and_next(ls);  /* skip 2nd `]' */
    280           goto endloop;
    281         }
    282         break;
    283       }
    284       case '\n': case '\r': {
    285         save(ls, '\n');
    286         inclinenumber(ls);
    287         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
    288         break;
    289       }
    290       default: {
    291         if (seminfo) save_and_next(ls);
    292         else next(ls);
    293       }
    294     }
    295   } endloop:
    296   if (seminfo)
    297     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
    298                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
    299 }
    300 
    301 
    302 static void escerror (LexState *ls, int *c, int n, const char *msg) {
    303   int i;
    304   luaZ_resetbuffer(ls->buff);  /* prepare error message */
    305   save(ls, '\\');
    306   for (i = 0; i < n && c[i] != EOZ; i++)
    307     save(ls, c[i]);
    308   lexerror(ls, msg, TK_STRING);
    309 }
    310 
    311 
    312 static int readhexaesc (LexState *ls) {
    313   int c[3], i;  /* keep input for error message */
    314   int r = 0;  /* result accumulator */
    315   c[0] = 'x';  /* for error message */
    316   for (i = 1; i < 3; i++) {  /* read two hexadecimal digits */
    317     c[i] = next(ls);
    318     if (!lisxdigit(c[i]))
    319       escerror(ls, c, i + 1, "hexadecimal digit expected");
    320     r = (r << 4) + luaO_hexavalue(c[i]);
    321   }
    322   return r;
    323 }
    324 
    325 
    326 static int readdecesc (LexState *ls) {
    327   int c[3], i;
    328   int r = 0;  /* result accumulator */
    329   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
    330     c[i] = ls->current;
    331     r = 10*r + c[i] - '0';
    332     next(ls);
    333   }
    334   if (r > UCHAR_MAX)
    335     escerror(ls, c, i, "decimal escape too large");
    336   return r;
    337 }
    338 
    339 
    340 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
    341   save_and_next(ls);  /* keep delimiter (for error messages) */
    342   while (ls->current != del) {
    343     switch (ls->current) {
    344       case EOZ:
    345         lexerror(ls, "unfinished string", TK_EOS);
    346         break;  /* to avoid warnings */
    347       case '\n':
    348       case '\r':
    349         lexerror(ls, "unfinished string", TK_STRING);
    350         break;  /* to avoid warnings */
    351       case '\\': {  /* escape sequences */
    352         int c;  /* final character to be saved */
    353         next(ls);  /* do not save the `\' */
    354         switch (ls->current) {
    355           case 'a': c = '\a'; goto read_save;
    356           case 'b': c = '\b'; goto read_save;
    357           case 'f': c = '\f'; goto read_save;
    358           case 'n': c = '\n'; goto read_save;
    359           case 'r': c = '\r'; goto read_save;
    360           case 't': c = '\t'; goto read_save;
    361           case 'v': c = '\v'; goto read_save;
    362           case 'x': c = readhexaesc(ls); goto read_save;
    363           case '\n': case '\r':
    364             inclinenumber(ls); c = '\n'; goto only_save;
    365           case '\\': case '\"': case '\'':
    366             c = ls->current; goto read_save;
    367           case EOZ: goto no_save;  /* will raise an error next loop */
    368           case 'z': {  /* zap following span of spaces */
    369             next(ls);  /* skip the 'z' */
    370             while (lisspace(ls->current)) {
    371               if (currIsNewline(ls)) inclinenumber(ls);
    372               else next(ls);
    373             }
    374             goto no_save;
    375           }
    376           default: {
    377             if (!lisdigit(ls->current))
    378               escerror(ls, &ls->current, 1, "invalid escape sequence");
    379             /* digital escape \ddd */
    380             c = readdecesc(ls);
    381             goto only_save;
    382           }
    383         }
    384        read_save: next(ls);  /* read next character */
    385        only_save: save(ls, c);  /* save 'c' */
    386        no_save: break;
    387       }
    388       default:
    389         save_and_next(ls);
    390     }
    391   }
    392   save_and_next(ls);  /* skip delimiter */
    393   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
    394                                    luaZ_bufflen(ls->buff) - 2);
    395 }
    396 
    397 
    398 static int llex (LexState *ls, SemInfo *seminfo) {
    399   luaZ_resetbuffer(ls->buff);
    400   for (;;) {
    401     switch (ls->current) {
    402       case '\n': case '\r': {  /* line breaks */
    403         inclinenumber(ls);
    404         break;
    405       }
    406       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
    407         next(ls);
    408         break;
    409       }
    410       case '-': {  /* '-' or '--' (comment) */
    411         next(ls);
    412         if (ls->current != '-') return '-';
    413         /* else is a comment */
    414         next(ls);
    415         if (ls->current == '[') {  /* long comment? */
    416           int sep = skip_sep(ls);
    417           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
    418           if (sep >= 0) {
    419             read_long_string(ls, NULL, sep);  /* skip long comment */
    420             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
    421             break;
    422           }
    423         }
    424         /* else short comment */
    425         while (!currIsNewline(ls) && ls->current != EOZ)
    426           next(ls);  /* skip until end of line (or end of file) */
    427         break;
    428       }
    429       case '[': {  /* long string or simply '[' */
    430         int sep = skip_sep(ls);
    431         if (sep >= 0) {
    432           read_long_string(ls, seminfo, sep);
    433           return TK_STRING;
    434         }
    435         else if (sep == -1) return '[';
    436         else lexerror(ls, "invalid long string delimiter", TK_STRING);
    437       }
    438       case '=': {
    439         next(ls);
    440         if (ls->current != '=') return '=';
    441         else { next(ls); return TK_EQ; }
    442       }
    443       case '<': {
    444         next(ls);
    445         if (ls->current != '=') return '<';
    446         else { next(ls); return TK_LE; }
    447       }
    448       case '>': {
    449         next(ls);
    450         if (ls->current != '=') return '>';
    451         else { next(ls); return TK_GE; }
    452       }
    453       case '~': {
    454         next(ls);
    455         if (ls->current != '=') return '~';
    456         else { next(ls); return TK_NE; }
    457       }
    458       case ':': {
    459         next(ls);
    460         if (ls->current != ':') return ':';
    461         else { next(ls); return TK_DBCOLON; }
    462       }
    463       case '"': case '\'': {  /* short literal strings */
    464         read_string(ls, ls->current, seminfo);
    465         return TK_STRING;
    466       }
    467       case '.': {  /* '.', '..', '...', or number */
    468         save_and_next(ls);
    469         if (check_next(ls, ".")) {
    470           if (check_next(ls, "."))
    471             return TK_DOTS;   /* '...' */
    472           else return TK_CONCAT;   /* '..' */
    473         }
    474         else if (!lisdigit(ls->current)) return '.';
    475         /* else go through */
    476       }
    477       case '0': case '1': case '2': case '3': case '4':
    478       case '5': case '6': case '7': case '8': case '9': {
    479         read_numeral(ls, seminfo);
    480         return TK_NUMBER;
    481       }
    482       case EOZ: {
    483         return TK_EOS;
    484       }
    485       default: {
    486         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
    487           TString *ts;
    488           do {
    489             save_and_next(ls);
    490           } while (lislalnum(ls->current));
    491           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
    492                                   luaZ_bufflen(ls->buff));
    493           seminfo->ts = ts;
    494           if (isreserved(ts))  /* reserved word? */
    495             return ts->tsv.extra - 1 + FIRST_RESERVED;
    496           else {
    497             return TK_NAME;
    498           }
    499         }
    500         else {  /* single-char tokens (+ - / ...) */
    501           int c = ls->current;
    502           next(ls);
    503           return c;
    504         }
    505       }
    506     }
    507   }
    508 }
    509 
    510 
    511 void luaX_next (LexState *ls) {
    512   ls->lastline = ls->linenumber;
    513   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    514     ls->t = ls->lookahead;  /* use this one */
    515     ls->lookahead.token = TK_EOS;  /* and discharge it */
    516   }
    517   else
    518     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
    519 }
    520 
    521 
    522 int luaX_lookahead (LexState *ls) {
    523   lua_assert(ls->lookahead.token == TK_EOS);
    524   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
    525   return ls->lookahead.token;
    526 }
    527 
    528