Home | History | Annotate | Download | only in Parser
      1 
      2 /* Parser-tokenizer link implementation */
      3 
      4 #include "pgenheaders.h"
      5 #include "tokenizer.h"
      6 #include "node.h"
      7 #include "grammar.h"
      8 #include "parser.h"
      9 #include "parsetok.h"
     10 #include "errcode.h"
     11 #include "graminit.h"
     12 
     13 
     14 /* Forward */
     15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
     16 static int initerr(perrdetail *err_ret, PyObject * filename);
     17 
     18 /* Parse input coming from a string.  Return error code, print some errors. */
     19 node *
     20 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
     21 {
     22     return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
     23 }
     24 
     25 node *
     26 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
     27                           perrdetail *err_ret, int flags)
     28 {
     29     return PyParser_ParseStringFlagsFilename(s, NULL,
     30                                              g, start, err_ret, flags);
     31 }
     32 
     33 node *
     34 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
     35                           grammar *g, int start,
     36                           perrdetail *err_ret, int flags)
     37 {
     38     int iflags = flags;
     39     return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
     40                                                err_ret, &iflags);
     41 }
     42 
     43 node *
     44 PyParser_ParseStringObject(const char *s, PyObject *filename,
     45                            grammar *g, int start,
     46                            perrdetail *err_ret, int *flags)
     47 {
     48     struct tok_state *tok;
     49     int exec_input = start == file_input;
     50 
     51     if (initerr(err_ret, filename) < 0)
     52         return NULL;
     53 
     54     if (*flags & PyPARSE_IGNORE_COOKIE)
     55         tok = PyTokenizer_FromUTF8(s, exec_input);
     56     else
     57         tok = PyTokenizer_FromString(s, exec_input);
     58     if (tok == NULL) {
     59         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
     60         return NULL;
     61     }
     62 
     63 #ifndef PGEN
     64     Py_INCREF(err_ret->filename);
     65     tok->filename = err_ret->filename;
     66 #endif
     67     return parsetok(tok, g, start, err_ret, flags);
     68 }
     69 
     70 node *
     71 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
     72                           grammar *g, int start,
     73                           perrdetail *err_ret, int *flags)
     74 {
     75     node *n;
     76     PyObject *filename = NULL;
     77 #ifndef PGEN
     78     if (filename_str != NULL) {
     79         filename = PyUnicode_DecodeFSDefault(filename_str);
     80         if (filename == NULL) {
     81             err_ret->error = E_ERROR;
     82             return NULL;
     83         }
     84     }
     85 #endif
     86     n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
     87 #ifndef PGEN
     88     Py_XDECREF(filename);
     89 #endif
     90     return n;
     91 }
     92 
     93 /* Parse input coming from a file.  Return error code, print some errors. */
     94 
     95 node *
     96 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
     97                    const char *ps1, const char *ps2,
     98                    perrdetail *err_ret)
     99 {
    100     return PyParser_ParseFileFlags(fp, filename, NULL,
    101                                    g, start, ps1, ps2, err_ret, 0);
    102 }
    103 
    104 node *
    105 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
    106                         grammar *g, int start,
    107                         const char *ps1, const char *ps2,
    108                         perrdetail *err_ret, int flags)
    109 {
    110     int iflags = flags;
    111     return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
    112                                      ps2, err_ret, &iflags);
    113 }
    114 
    115 node *
    116 PyParser_ParseFileObject(FILE *fp, PyObject *filename,
    117                          const char *enc, grammar *g, int start,
    118                          const char *ps1, const char *ps2,
    119                          perrdetail *err_ret, int *flags)
    120 {
    121     struct tok_state *tok;
    122 
    123     if (initerr(err_ret, filename) < 0)
    124         return NULL;
    125 
    126     if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
    127         err_ret->error = E_NOMEM;
    128         return NULL;
    129     }
    130 #ifndef PGEN
    131     Py_INCREF(err_ret->filename);
    132     tok->filename = err_ret->filename;
    133 #endif
    134     return parsetok(tok, g, start, err_ret, flags);
    135 }
    136 
    137 node *
    138 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
    139                           const char *enc, grammar *g, int start,
    140                           const char *ps1, const char *ps2,
    141                           perrdetail *err_ret, int *flags)
    142 {
    143     node *n;
    144     PyObject *fileobj = NULL;
    145 #ifndef PGEN
    146     if (filename != NULL) {
    147         fileobj = PyUnicode_DecodeFSDefault(filename);
    148         if (fileobj == NULL) {
    149             err_ret->error = E_ERROR;
    150             return NULL;
    151         }
    152     }
    153 #endif
    154     n = PyParser_ParseFileObject(fp, fileobj, enc, g,
    155                                  start, ps1, ps2, err_ret, flags);
    156 #ifndef PGEN
    157     Py_XDECREF(fileobj);
    158 #endif
    159     return n;
    160 }
    161 
    162 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    163 #if 0
    164 static const char with_msg[] =
    165 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
    166 
    167 static const char as_msg[] =
    168 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
    169 
    170 static void
    171 warn(const char *msg, const char *filename, int lineno)
    172 {
    173     if (filename == NULL)
    174         filename = "<string>";
    175     PySys_WriteStderr(msg, filename, lineno);
    176 }
    177 #endif
    178 #endif
    179 
    180 /* Parse input coming from the given tokenizer structure.
    181    Return error code. */
    182 
    183 static node *
    184 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
    185          int *flags)
    186 {
    187     parser_state *ps;
    188     node *n;
    189     int started = 0;
    190 
    191     if ((ps = PyParser_New(g, start)) == NULL) {
    192         err_ret->error = E_NOMEM;
    193         PyTokenizer_Free(tok);
    194         return NULL;
    195     }
    196 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    197     if (*flags & PyPARSE_BARRY_AS_BDFL)
    198         ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
    199 #endif
    200 
    201     for (;;) {
    202         char *a, *b;
    203         int type;
    204         size_t len;
    205         char *str;
    206         int col_offset;
    207 
    208         type = PyTokenizer_Get(tok, &a, &b);
    209         if (type == ERRORTOKEN) {
    210             err_ret->error = tok->done;
    211             break;
    212         }
    213         if (type == ENDMARKER && started) {
    214             type = NEWLINE; /* Add an extra newline */
    215             started = 0;
    216             /* Add the right number of dedent tokens,
    217                except if a certain flag is given --
    218                codeop.py uses this. */
    219             if (tok->indent &&
    220                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
    221             {
    222                 tok->pendin = -tok->indent;
    223                 tok->indent = 0;
    224             }
    225         }
    226         else
    227             started = 1;
    228         len = b - a; /* XXX this may compute NULL - NULL */
    229         str = (char *) PyObject_MALLOC(len + 1);
    230         if (str == NULL) {
    231             err_ret->error = E_NOMEM;
    232             break;
    233         }
    234         if (len > 0)
    235             strncpy(str, a, len);
    236         str[len] = '\0';
    237 
    238 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    239         if (type == NOTEQUAL) {
    240             if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
    241                             strcmp(str, "!=")) {
    242                 PyObject_FREE(str);
    243                 err_ret->error = E_SYNTAX;
    244                 break;
    245             }
    246             else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
    247                             strcmp(str, "<>")) {
    248                 PyObject_FREE(str);
    249                 err_ret->text = "with Barry as BDFL, use '<>' "
    250                                 "instead of '!='";
    251                 err_ret->error = E_SYNTAX;
    252                 break;
    253             }
    254         }
    255 #endif
    256         if (a >= tok->line_start)
    257             col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
    258                                           intptr_t, int);
    259         else
    260             col_offset = -1;
    261 
    262         if ((err_ret->error =
    263              PyParser_AddToken(ps, (int)type, str,
    264                                tok->lineno, col_offset,
    265                                &(err_ret->expected))) != E_OK) {
    266             if (err_ret->error != E_DONE) {
    267                 PyObject_FREE(str);
    268                 err_ret->token = type;
    269             }
    270             break;
    271         }
    272     }
    273 
    274     if (err_ret->error == E_DONE) {
    275         n = ps->p_tree;
    276         ps->p_tree = NULL;
    277 
    278 #ifndef PGEN
    279         /* Check that the source for a single input statement really
    280            is a single statement by looking at what is left in the
    281            buffer after parsing.  Trailing whitespace and comments
    282            are OK.  */
    283         if (start == single_input) {
    284             char *cur = tok->cur;
    285             char c = *tok->cur;
    286 
    287             for (;;) {
    288                 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
    289                     c = *++cur;
    290 
    291                 if (!c)
    292                     break;
    293 
    294                 if (c != '#') {
    295                     err_ret->error = E_BADSINGLE;
    296                     PyNode_Free(n);
    297                     n = NULL;
    298                     break;
    299                 }
    300 
    301                 /* Suck up comment. */
    302                 while (c && c != '\n')
    303                     c = *++cur;
    304             }
    305         }
    306 #endif
    307     }
    308     else
    309         n = NULL;
    310 
    311 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    312     *flags = ps->p_flags;
    313 #endif
    314     PyParser_Delete(ps);
    315 
    316     if (n == NULL) {
    317         if (tok->done == E_EOF)
    318             err_ret->error = E_EOF;
    319         err_ret->lineno = tok->lineno;
    320         if (tok->buf != NULL) {
    321             size_t len;
    322             assert(tok->cur - tok->buf < INT_MAX);
    323             err_ret->offset = (int)(tok->cur - tok->buf);
    324             len = tok->inp - tok->buf;
    325             err_ret->text = (char *) PyObject_MALLOC(len + 1);
    326             if (err_ret->text != NULL) {
    327                 if (len > 0)
    328                     strncpy(err_ret->text, tok->buf, len);
    329                 err_ret->text[len] = '\0';
    330             }
    331         }
    332     } else if (tok->encoding != NULL) {
    333         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
    334          * allocated using PyMem_
    335          */
    336         node* r = PyNode_New(encoding_decl);
    337         if (r)
    338             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
    339         if (!r || !r->n_str) {
    340             err_ret->error = E_NOMEM;
    341             if (r)
    342                 PyObject_FREE(r);
    343             n = NULL;
    344             goto done;
    345         }
    346         strcpy(r->n_str, tok->encoding);
    347         PyMem_FREE(tok->encoding);
    348         tok->encoding = NULL;
    349         r->n_nchildren = 1;
    350         r->n_child = n;
    351         n = r;
    352     }
    353 
    354 done:
    355     PyTokenizer_Free(tok);
    356 
    357     return n;
    358 }
    359 
    360 static int
    361 initerr(perrdetail *err_ret, PyObject *filename)
    362 {
    363     err_ret->error = E_OK;
    364     err_ret->lineno = 0;
    365     err_ret->offset = 0;
    366     err_ret->text = NULL;
    367     err_ret->token = -1;
    368     err_ret->expected = -1;
    369 #ifndef PGEN
    370     if (filename) {
    371         Py_INCREF(filename);
    372         err_ret->filename = filename;
    373     }
    374     else {
    375         err_ret->filename = PyUnicode_FromString("<string>");
    376         if (err_ret->filename == NULL) {
    377             err_ret->error = E_ERROR;
    378             return -1;
    379         }
    380     }
    381 #endif
    382     return 0;
    383 }
    384