Home | History | Annotate | Download | only in Parser
      1 
      2 /* Parser-tokenizer link implementation */
      3 
      4 #include "pgenheaders.h"
      5 #include "tokenizer.h"
      6 #include "node.h"
      7 #include "grammar.h"
      8 #include "parser.h"
      9 #include "parsetok.h"
     10 #include "errcode.h"
     11 #include "graminit.h"
     12 
     13 
     14 /* Forward */
     15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
     16 static int initerr(perrdetail *err_ret, PyObject * filename);
     17 
     18 /* Parse input coming from a string.  Return error code, print some errors. */
     19 node *
     20 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
     21 {
     22     return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
     23 }
     24 
     25 node *
     26 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
     27                           perrdetail *err_ret, int flags)
     28 {
     29     return PyParser_ParseStringFlagsFilename(s, NULL,
     30                                              g, start, err_ret, flags);
     31 }
     32 
     33 node *
     34 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
     35                           grammar *g, int start,
     36                           perrdetail *err_ret, int flags)
     37 {
     38     int iflags = flags;
     39     return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
     40                                                err_ret, &iflags);
     41 }
     42 
     43 node *
     44 PyParser_ParseStringObject(const char *s, PyObject *filename,
     45                            grammar *g, int start,
     46                            perrdetail *err_ret, int *flags)
     47 {
     48     struct tok_state *tok;
     49     int exec_input = start == file_input;
     50 
     51     if (initerr(err_ret, filename) < 0)
     52         return NULL;
     53 
     54     if (*flags & PyPARSE_IGNORE_COOKIE)
     55         tok = PyTokenizer_FromUTF8(s, exec_input);
     56     else
     57         tok = PyTokenizer_FromString(s, exec_input);
     58     if (tok == NULL) {
     59         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
     60         return NULL;
     61     }
     62 
     63 #ifndef PGEN
     64     Py_INCREF(err_ret->filename);
     65     tok->filename = err_ret->filename;
     66 #endif
     67     return parsetok(tok, g, start, err_ret, flags);
     68 }
     69 
     70 node *
     71 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
     72                           grammar *g, int start,
     73                           perrdetail *err_ret, int *flags)
     74 {
     75     node *n;
     76     PyObject *filename = NULL;
     77 #ifndef PGEN
     78     if (filename_str != NULL) {
     79         filename = PyUnicode_DecodeFSDefault(filename_str);
     80         if (filename == NULL) {
     81             err_ret->error = E_ERROR;
     82             return NULL;
     83         }
     84     }
     85 #endif
     86     n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
     87 #ifndef PGEN
     88     Py_XDECREF(filename);
     89 #endif
     90     return n;
     91 }
     92 
     93 /* Parse input coming from a file.  Return error code, print some errors. */
     94 
     95 node *
     96 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
     97                    const char *ps1, const char *ps2,
     98                    perrdetail *err_ret)
     99 {
    100     return PyParser_ParseFileFlags(fp, filename, NULL,
    101                                    g, start, ps1, ps2, err_ret, 0);
    102 }
    103 
    104 node *
    105 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
    106                         grammar *g, int start,
    107                         const char *ps1, const char *ps2,
    108                         perrdetail *err_ret, int flags)
    109 {
    110     int iflags = flags;
    111     return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
    112                                      ps2, err_ret, &iflags);
    113 }
    114 
    115 node *
    116 PyParser_ParseFileObject(FILE *fp, PyObject *filename,
    117                          const char *enc, grammar *g, int start,
    118                          const char *ps1, const char *ps2,
    119                          perrdetail *err_ret, int *flags)
    120 {
    121     struct tok_state *tok;
    122 
    123     if (initerr(err_ret, filename) < 0)
    124         return NULL;
    125 
    126     if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
    127         err_ret->error = E_NOMEM;
    128         return NULL;
    129     }
    130 #ifndef PGEN
    131     Py_INCREF(err_ret->filename);
    132     tok->filename = err_ret->filename;
    133 #endif
    134     return parsetok(tok, g, start, err_ret, flags);
    135 }
    136 
    137 node *
    138 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
    139                           const char *enc, grammar *g, int start,
    140                           const char *ps1, const char *ps2,
    141                           perrdetail *err_ret, int *flags)
    142 {
    143     node *n;
    144     PyObject *fileobj = NULL;
    145 #ifndef PGEN
    146     if (filename != NULL) {
    147         fileobj = PyUnicode_DecodeFSDefault(filename);
    148         if (fileobj == NULL) {
    149             err_ret->error = E_ERROR;
    150             return NULL;
    151         }
    152     }
    153 #endif
    154     n = PyParser_ParseFileObject(fp, fileobj, enc, g,
    155                                  start, ps1, ps2, err_ret, flags);
    156 #ifndef PGEN
    157     Py_XDECREF(fileobj);
    158 #endif
    159     return n;
    160 }
    161 
    162 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    163 #if 0
    164 static const char with_msg[] =
    165 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
    166 
    167 static const char as_msg[] =
    168 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
    169 
    170 static void
    171 warn(const char *msg, const char *filename, int lineno)
    172 {
    173     if (filename == NULL)
    174         filename = "<string>";
    175     PySys_WriteStderr(msg, filename, lineno);
    176 }
    177 #endif
    178 #endif
    179 
    180 /* Parse input coming from the given tokenizer structure.
    181    Return error code. */
    182 
    183 static node *
    184 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
    185          int *flags)
    186 {
    187     parser_state *ps;
    188     node *n;
    189     int started = 0;
    190 
    191     if ((ps = PyParser_New(g, start)) == NULL) {
    192         err_ret->error = E_NOMEM;
    193         PyTokenizer_Free(tok);
    194         return NULL;
    195     }
    196 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    197     if (*flags & PyPARSE_BARRY_AS_BDFL)
    198         ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
    199 #endif
    200 
    201     for (;;) {
    202         char *a, *b;
    203         int type;
    204         size_t len;
    205         char *str;
    206         int col_offset;
    207 
    208         type = PyTokenizer_Get(tok, &a, &b);
    209         if (type == ERRORTOKEN) {
    210             err_ret->error = tok->done;
    211             break;
    212         }
    213         if (type == ENDMARKER && started) {
    214             type = NEWLINE; /* Add an extra newline */
    215             started = 0;
    216             /* Add the right number of dedent tokens,
    217                except if a certain flag is given --
    218                codeop.py uses this. */
    219             if (tok->indent &&
    220                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
    221             {
    222                 tok->pendin = -tok->indent;
    223                 tok->indent = 0;
    224             }
    225         }
    226         else
    227             started = 1;
    228         len = (a != NULL && b != NULL) ? b - a : 0;
    229         str = (char *) PyObject_MALLOC(len + 1);
    230         if (str == NULL) {
    231             err_ret->error = E_NOMEM;
    232             break;
    233         }
    234         if (len > 0)
    235             strncpy(str, a, len);
    236         str[len] = '\0';
    237 
    238 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    239         if (type == NOTEQUAL) {
    240             if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
    241                             strcmp(str, "!=")) {
    242                 PyObject_FREE(str);
    243                 err_ret->error = E_SYNTAX;
    244                 break;
    245             }
    246             else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
    247                             strcmp(str, "<>")) {
    248                 PyObject_FREE(str);
    249                 err_ret->expected = NOTEQUAL;
    250                 err_ret->error = E_SYNTAX;
    251                 break;
    252             }
    253         }
    254 #endif
    255         if (a != NULL && a >= tok->line_start) {
    256             col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
    257                                           intptr_t, int);
    258         }
    259         else {
    260             col_offset = -1;
    261         }
    262 
    263         if ((err_ret->error =
    264              PyParser_AddToken(ps, (int)type, str,
    265                                tok->lineno, col_offset,
    266                                &(err_ret->expected))) != E_OK) {
    267             if (err_ret->error != E_DONE) {
    268                 PyObject_FREE(str);
    269                 err_ret->token = type;
    270             }
    271             break;
    272         }
    273     }
    274 
    275     if (err_ret->error == E_DONE) {
    276         n = ps->p_tree;
    277         ps->p_tree = NULL;
    278 
    279 #ifndef PGEN
    280         /* Check that the source for a single input statement really
    281            is a single statement by looking at what is left in the
    282            buffer after parsing.  Trailing whitespace and comments
    283            are OK.  */
    284         if (start == single_input) {
    285             char *cur = tok->cur;
    286             char c = *tok->cur;
    287 
    288             for (;;) {
    289                 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
    290                     c = *++cur;
    291 
    292                 if (!c)
    293                     break;
    294 
    295                 if (c != '#') {
    296                     err_ret->error = E_BADSINGLE;
    297                     PyNode_Free(n);
    298                     n = NULL;
    299                     break;
    300                 }
    301 
    302                 /* Suck up comment. */
    303                 while (c && c != '\n')
    304                     c = *++cur;
    305             }
    306         }
    307 #endif
    308     }
    309     else
    310         n = NULL;
    311 
    312 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    313     *flags = ps->p_flags;
    314 #endif
    315     PyParser_Delete(ps);
    316 
    317     if (n == NULL) {
    318         if (tok->done == E_EOF)
    319             err_ret->error = E_EOF;
    320         err_ret->lineno = tok->lineno;
    321         if (tok->buf != NULL) {
    322             size_t len;
    323             assert(tok->cur - tok->buf < INT_MAX);
    324             err_ret->offset = (int)(tok->cur - tok->buf);
    325             len = tok->inp - tok->buf;
    326             err_ret->text = (char *) PyObject_MALLOC(len + 1);
    327             if (err_ret->text != NULL) {
    328                 if (len > 0)
    329                     strncpy(err_ret->text, tok->buf, len);
    330                 err_ret->text[len] = '\0';
    331             }
    332         }
    333     } else if (tok->encoding != NULL) {
    334         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
    335          * allocated using PyMem_
    336          */
    337         node* r = PyNode_New(encoding_decl);
    338         if (r)
    339             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
    340         if (!r || !r->n_str) {
    341             err_ret->error = E_NOMEM;
    342             if (r)
    343                 PyObject_FREE(r);
    344             n = NULL;
    345             goto done;
    346         }
    347         strcpy(r->n_str, tok->encoding);
    348         PyMem_FREE(tok->encoding);
    349         tok->encoding = NULL;
    350         r->n_nchildren = 1;
    351         r->n_child = n;
    352         n = r;
    353     }
    354 
    355 done:
    356     PyTokenizer_Free(tok);
    357 
    358     return n;
    359 }
    360 
    361 static int
    362 initerr(perrdetail *err_ret, PyObject *filename)
    363 {
    364     err_ret->error = E_OK;
    365     err_ret->lineno = 0;
    366     err_ret->offset = 0;
    367     err_ret->text = NULL;
    368     err_ret->token = -1;
    369     err_ret->expected = -1;
    370 #ifndef PGEN
    371     if (filename) {
    372         Py_INCREF(filename);
    373         err_ret->filename = filename;
    374     }
    375     else {
    376         err_ret->filename = PyUnicode_FromString("<string>");
    377         if (err_ret->filename == NULL) {
    378             err_ret->error = E_ERROR;
    379             return -1;
    380         }
    381     }
    382 #endif
    383     return 0;
    384 }
    385