Home | History | Annotate | Download | only in src
      1 /*
      2 ** $Id: llex.c,v 2.63.1.2 2013/08/30 15:49:41 roberto Exp $
      3 ** Lexical Analyzer
      4 ** See Copyright Notice in lua.h
      5 */
      6 
      7 
      8 #include <locale.h>
      9 #include <string.h>
     10 
     11 #define llex_c
     12 #define LUA_CORE
     13 
     14 #include "lua.h"
     15 
     16 #include "lctype.h"
     17 #include "ldo.h"
     18 #include "llex.h"
     19 #include "lobject.h"
     20 #include "lparser.h"
     21 #include "lstate.h"
     22 #include "lstring.h"
     23 #include "ltable.h"
     24 #include "lzio.h"
     25 
     26 
     27 
     28 #define next(ls) (ls->current = zgetc(ls->z))
     29 
     30 
     31 
     32 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
     33 
     34 
     35 /* ORDER RESERVED */
     36 static const char *const luaX_tokens [] = {
     37     "and", "break", "do", "else", "elseif",
     38     "end", "false", "for", "function", "goto", "if",
     39     "in", "local", "nil", "not", "or", "repeat",
     40     "return", "then", "true", "until", "while",
     41     "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
     42     "<number>", "<name>", "<string>"
     43 };
     44 
     45 
     46 #define save_and_next(ls) (save(ls, ls->current), next(ls))
     47 
     48 
     49 static l_noret lexerror (LexState *ls, const char *msg, int token);
     50 
     51 
     52 static void save (LexState *ls, int c) {
     53   Mbuffer *b = ls->buff;
     54   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
     55     size_t newsize;
     56     if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
     57       lexerror(ls, "lexical element too long", 0);
     58     newsize = luaZ_sizebuffer(b) * 2;
     59     luaZ_resizebuffer(ls->L, b, newsize);
     60   }
     61   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
     62 }
     63 
     64 
     65 void luaX_init (lua_State *L) {
     66   int i;
     67   for (i=0; i<NUM_RESERVED; i++) {
     68     TString *ts = luaS_new(L, luaX_tokens[i]);
     69     luaS_fix(ts);  /* reserved words are never collected */
     70     ts->tsv.extra = cast_byte(i+1);  /* reserved word */
     71   }
     72 }
     73 
     74 
     75 const char *luaX_token2str (LexState *ls, int token) {
     76   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
     77     lua_assert(token == cast(unsigned char, token));
     78     return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
     79                               luaO_pushfstring(ls->L, "char(%d)", token);
     80   }
     81   else {
     82     const char *s = luaX_tokens[token - FIRST_RESERVED];
     83     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
     84       return luaO_pushfstring(ls->L, LUA_QS, s);
     85     else  /* names, strings, and numerals */
     86       return s;
     87   }
     88 }
     89 
     90 
     91 static const char *txtToken (LexState *ls, int token) {
     92   switch (token) {
     93     case TK_NAME:
     94     case TK_STRING:
     95     case TK_NUMBER:
     96       save(ls, '\0');
     97       return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
     98     default:
     99       return luaX_token2str(ls, token);
    100   }
    101 }
    102 
    103 
    104 static l_noret lexerror (LexState *ls, const char *msg, int token) {
    105   char buff[LUA_IDSIZE];
    106   luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
    107   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
    108   if (token)
    109     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
    110   luaD_throw(ls->L, LUA_ERRSYNTAX);
    111 }
    112 
    113 
    114 l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
    115   lexerror(ls, msg, ls->t.token);
    116 }
    117 
    118 
    119 /*
    120 ** creates a new string and anchors it in function's table so that
    121 ** it will not be collected until the end of the function's compilation
    122 ** (by that time it should be anchored in function's prototype)
    123 */
    124 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
    125   lua_State *L = ls->L;
    126   TValue *o;  /* entry for `str' */
    127   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
    128   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
    129   o = luaH_set(L, ls->fs->h, L->top - 1);
    130   if (ttisnil(o)) {  /* not in use yet? (see 'addK') */
    131     /* boolean value does not need GC barrier;
    132        table has no metatable, so it does not need to invalidate cache */
    133     setbvalue(o, 1);  /* t[string] = true */
    134     luaC_checkGC(L);
    135   }
    136   else {  /* string already present */
    137     ts = rawtsvalue(keyfromval(o));  /* re-use value previously stored */
    138   }
    139   L->top--;  /* remove string from stack */
    140   return ts;
    141 }
    142 
    143 
    144 /*
    145 ** increment line number and skips newline sequence (any of
    146 ** \n, \r, \n\r, or \r\n)
    147 */
    148 static void inclinenumber (LexState *ls) {
    149   int old = ls->current;
    150   lua_assert(currIsNewline(ls));
    151   next(ls);  /* skip `\n' or `\r' */
    152   if (currIsNewline(ls) && ls->current != old)
    153     next(ls);  /* skip `\n\r' or `\r\n' */
    154   if (++ls->linenumber >= MAX_INT)
    155     luaX_syntaxerror(ls, "chunk has too many lines");
    156 }
    157 
    158 
    159 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
    160                     int firstchar) {
    161   ls->decpoint = '.';
    162   ls->L = L;
    163   ls->current = firstchar;
    164   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
    165   ls->z = z;
    166   ls->fs = NULL;
    167   ls->linenumber = 1;
    168   ls->lastline = 1;
    169   ls->source = source;
    170   ls->envn = luaS_new(L, LUA_ENV);  /* create env name */
    171   luaS_fix(ls->envn);  /* never collect this name */
    172   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
    173 }
    174 
    175 
    176 
    177 /*
    178 ** =======================================================
    179 ** LEXICAL ANALYZER
    180 ** =======================================================
    181 */
    182 
    183 
    184 
    185 static int check_next (LexState *ls, const char *set) {
    186   if (ls->current == '\0' || !strchr(set, ls->current))
    187     return 0;
    188   save_and_next(ls);
    189   return 1;
    190 }
    191 
    192 
    193 /*
    194 ** change all characters 'from' in buffer to 'to'
    195 */
    196 static void buffreplace (LexState *ls, char from, char to) {
    197   size_t n = luaZ_bufflen(ls->buff);
    198   char *p = luaZ_buffer(ls->buff);
    199   while (n--)
    200     if (p[n] == from) p[n] = to;
    201 }
    202 
    203 
    204 #if !defined(getlocaledecpoint)
    205 #define getlocaledecpoint()	(localeconv()->decimal_point[0])
    206 #endif
    207 
    208 
    209 #define buff2d(b,e)	luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
    210 
    211 /*
    212 ** in case of format error, try to change decimal point separator to
    213 ** the one defined in the current locale and check again
    214 */
    215 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
    216   char old = ls->decpoint;
    217   ls->decpoint = getlocaledecpoint();
    218   buffreplace(ls, old, ls->decpoint);  /* try new decimal separator */
    219   if (!buff2d(ls->buff, &seminfo->r)) {
    220     /* format error with correct decimal point: no more options */
    221     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
    222     lexerror(ls, "malformed number", TK_NUMBER);
    223   }
    224 }
    225 
    226 
    227 /* LUA_NUMBER */
    228 /*
    229 ** this function is quite liberal in what it accepts, as 'luaO_str2d'
    230 ** will reject ill-formed numerals.
    231 */
    232 static void read_numeral (LexState *ls, SemInfo *seminfo) {
    233   const char *expo = "Ee";
    234   int first = ls->current;
    235   lua_assert(lisdigit(ls->current));
    236   save_and_next(ls);
    237   if (first == '0' && check_next(ls, "Xx"))  /* hexadecimal? */
    238     expo = "Pp";
    239   for (;;) {
    240     if (check_next(ls, expo))  /* exponent part? */
    241       check_next(ls, "+-");  /* optional exponent sign */
    242     if (lisxdigit(ls->current) || ls->current == '.')
    243       save_and_next(ls);
    244     else  break;
    245   }
    246   save(ls, '\0');
    247   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
    248   if (!buff2d(ls->buff, &seminfo->r))  /* format error? */
    249     trydecpoint(ls, seminfo); /* try to update decimal point separator */
    250 }
    251 
    252 
    253 /*
    254 ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
    255 ** -1 if sequence is malformed
    256 */
    257 static int skip_sep (LexState *ls) {
    258   int count = 0;
    259   int s = ls->current;
    260   lua_assert(s == '[' || s == ']');
    261   save_and_next(ls);
    262   while (ls->current == '=') {
    263     save_and_next(ls);
    264     count++;
    265   }
    266   return (ls->current == s) ? count : (-count) - 1;
    267 }
    268 
    269 
    270 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
    271   save_and_next(ls);  /* skip 2nd `[' */
    272   if (currIsNewline(ls))  /* string starts with a newline? */
    273     inclinenumber(ls);  /* skip it */
    274   for (;;) {
    275     switch (ls->current) {
    276       case EOZ:
    277         lexerror(ls, (seminfo) ? "unfinished long string" :
    278                                  "unfinished long comment", TK_EOS);
    279         break;  /* to avoid warnings */
    280       case ']': {
    281         if (skip_sep(ls) == sep) {
    282           save_and_next(ls);  /* skip 2nd `]' */
    283           goto endloop;
    284         }
    285         break;
    286       }
    287       case '\n': case '\r': {
    288         save(ls, '\n');
    289         inclinenumber(ls);
    290         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
    291         break;
    292       }
    293       default: {
    294         if (seminfo) save_and_next(ls);
    295         else next(ls);
    296       }
    297     }
    298   } endloop:
    299   if (seminfo)
    300     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
    301                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
    302 }
    303 
    304 
    305 static void escerror (LexState *ls, int *c, int n, const char *msg) {
    306   int i;
    307   luaZ_resetbuffer(ls->buff);  /* prepare error message */
    308   save(ls, '\\');
    309   for (i = 0; i < n && c[i] != EOZ; i++)
    310     save(ls, c[i]);
    311   lexerror(ls, msg, TK_STRING);
    312 }
    313 
    314 
    315 static int readhexaesc (LexState *ls) {
    316   int c[3], i;  /* keep input for error message */
    317   int r = 0;  /* result accumulator */
    318   c[0] = 'x';  /* for error message */
    319   for (i = 1; i < 3; i++) {  /* read two hexadecimal digits */
    320     c[i] = next(ls);
    321     if (!lisxdigit(c[i]))
    322       escerror(ls, c, i + 1, "hexadecimal digit expected");
    323     r = (r << 4) + luaO_hexavalue(c[i]);
    324   }
    325   return r;
    326 }
    327 
    328 
    329 static int readdecesc (LexState *ls) {
    330   int c[3], i;
    331   int r = 0;  /* result accumulator */
    332   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
    333     c[i] = ls->current;
    334     r = 10*r + c[i] - '0';
    335     next(ls);
    336   }
    337   if (r > UCHAR_MAX)
    338     escerror(ls, c, i, "decimal escape too large");
    339   return r;
    340 }
    341 
    342 
    343 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
    344   save_and_next(ls);  /* keep delimiter (for error messages) */
    345   while (ls->current != del) {
    346     switch (ls->current) {
    347       case EOZ:
    348         lexerror(ls, "unfinished string", TK_EOS);
    349         break;  /* to avoid warnings */
    350       case '\n':
    351       case '\r':
    352         lexerror(ls, "unfinished string", TK_STRING);
    353         break;  /* to avoid warnings */
    354       case '\\': {  /* escape sequences */
    355         int c;  /* final character to be saved */
    356         next(ls);  /* do not save the `\' */
    357         switch (ls->current) {
    358           case 'a': c = '\a'; goto read_save;
    359           case 'b': c = '\b'; goto read_save;
    360           case 'f': c = '\f'; goto read_save;
    361           case 'n': c = '\n'; goto read_save;
    362           case 'r': c = '\r'; goto read_save;
    363           case 't': c = '\t'; goto read_save;
    364           case 'v': c = '\v'; goto read_save;
    365           case 'x': c = readhexaesc(ls); goto read_save;
    366           case '\n': case '\r':
    367             inclinenumber(ls); c = '\n'; goto only_save;
    368           case '\\': case '\"': case '\'':
    369             c = ls->current; goto read_save;
    370           case EOZ: goto no_save;  /* will raise an error next loop */
    371           case 'z': {  /* zap following span of spaces */
    372             next(ls);  /* skip the 'z' */
    373             while (lisspace(ls->current)) {
    374               if (currIsNewline(ls)) inclinenumber(ls);
    375               else next(ls);
    376             }
    377             goto no_save;
    378           }
    379           default: {
    380             if (!lisdigit(ls->current))
    381               escerror(ls, &ls->current, 1, "invalid escape sequence");
    382             /* digital escape \ddd */
    383             c = readdecesc(ls);
    384             goto only_save;
    385           }
    386         }
    387        read_save: next(ls);  /* read next character */
    388        only_save: save(ls, c);  /* save 'c' */
    389        no_save: break;
    390       }
    391       default:
    392         save_and_next(ls);
    393     }
    394   }
    395   save_and_next(ls);  /* skip delimiter */
    396   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
    397                                    luaZ_bufflen(ls->buff) - 2);
    398 }
    399 
    400 
    401 static int llex (LexState *ls, SemInfo *seminfo) {
    402   luaZ_resetbuffer(ls->buff);
    403   for (;;) {
    404     switch (ls->current) {
    405       case '\n': case '\r': {  /* line breaks */
    406         inclinenumber(ls);
    407         break;
    408       }
    409       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
    410         next(ls);
    411         break;
    412       }
    413       case '-': {  /* '-' or '--' (comment) */
    414         next(ls);
    415         if (ls->current != '-') return '-';
    416         /* else is a comment */
    417         next(ls);
    418         if (ls->current == '[') {  /* long comment? */
    419           int sep = skip_sep(ls);
    420           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
    421           if (sep >= 0) {
    422             read_long_string(ls, NULL, sep);  /* skip long comment */
    423             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
    424             break;
    425           }
    426         }
    427         /* else short comment */
    428         while (!currIsNewline(ls) && ls->current != EOZ)
    429           next(ls);  /* skip until end of line (or end of file) */
    430         break;
    431       }
    432       case '[': {  /* long string or simply '[' */
    433         int sep = skip_sep(ls);
    434         if (sep >= 0) {
    435           read_long_string(ls, seminfo, sep);
    436           return TK_STRING;
    437         }
    438         else if (sep == -1) return '[';
    439         else lexerror(ls, "invalid long string delimiter", TK_STRING);
    440       }
    441       case '=': {
    442         next(ls);
    443         if (ls->current != '=') return '=';
    444         else { next(ls); return TK_EQ; }
    445       }
    446       case '<': {
    447         next(ls);
    448         if (ls->current != '=') return '<';
    449         else { next(ls); return TK_LE; }
    450       }
    451       case '>': {
    452         next(ls);
    453         if (ls->current != '=') return '>';
    454         else { next(ls); return TK_GE; }
    455       }
    456       case '~': {
    457         next(ls);
    458         if (ls->current != '=') return '~';
    459         else { next(ls); return TK_NE; }
    460       }
    461       case ':': {
    462         next(ls);
    463         if (ls->current != ':') return ':';
    464         else { next(ls); return TK_DBCOLON; }
    465       }
    466       case '"': case '\'': {  /* short literal strings */
    467         read_string(ls, ls->current, seminfo);
    468         return TK_STRING;
    469       }
    470       case '.': {  /* '.', '..', '...', or number */
    471         save_and_next(ls);
    472         if (check_next(ls, ".")) {
    473           if (check_next(ls, "."))
    474             return TK_DOTS;   /* '...' */
    475           else return TK_CONCAT;   /* '..' */
    476         }
    477         else if (!lisdigit(ls->current)) return '.';
    478         /* else go through */
    479       }
    480       case '0': case '1': case '2': case '3': case '4':
    481       case '5': case '6': case '7': case '8': case '9': {
    482         read_numeral(ls, seminfo);
    483         return TK_NUMBER;
    484       }
    485       case EOZ: {
    486         return TK_EOS;
    487       }
    488       default: {
    489         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
    490           TString *ts;
    491           do {
    492             save_and_next(ls);
    493           } while (lislalnum(ls->current));
    494           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
    495                                   luaZ_bufflen(ls->buff));
    496           seminfo->ts = ts;
    497           if (isreserved(ts))  /* reserved word? */
    498             return ts->tsv.extra - 1 + FIRST_RESERVED;
    499           else {
    500             return TK_NAME;
    501           }
    502         }
    503         else {  /* single-char tokens (+ - / ...) */
    504           int c = ls->current;
    505           next(ls);
    506           return c;
    507         }
    508       }
    509     }
    510   }
    511 }
    512 
    513 
    514 void luaX_next (LexState *ls) {
    515   ls->lastline = ls->linenumber;
    516   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    517     ls->t = ls->lookahead;  /* use this one */
    518     ls->lookahead.token = TK_EOS;  /* and discharge it */
    519   }
    520   else
    521     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
    522 }
    523 
    524 
    525 int luaX_lookahead (LexState *ls) {
    526   lua_assert(ls->lookahead.token == TK_EOS);
    527   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
    528   return ls->lookahead.token;
    529 }
    530 
    531