Home | History | Annotate | Download | only in Parser
      1 
      2 /* Parser-tokenizer link implementation */
      3 
      4 #include "pgenheaders.h"
      5 #include "tokenizer.h"
      6 #include "node.h"
      7 #include "grammar.h"
      8 #include "parser.h"
      9 #include "parsetok.h"
     10 #include "errcode.h"
     11 #include "graminit.h"
     12 
     13 int Py_TabcheckFlag;
     14 
     15 
     16 /* Forward */
     17 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
     18 static void initerr(perrdetail *err_ret, const char* filename);
     19 
     20 /* Parse input coming from a string.  Return error code, print some errors. */
     21 node *
     22 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
     23 {
     24     return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
     25 }
     26 
     27 node *
     28 PyParser_ParseStringFlags(const char *s, grammar *g, int start,
     29                           perrdetail *err_ret, int flags)
     30 {
     31     return PyParser_ParseStringFlagsFilename(s, NULL,
     32                                              g, start, err_ret, flags);
     33 }
     34 
     35 node *
     36 PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
     37                           grammar *g, int start,
     38                           perrdetail *err_ret, int flags)
     39 {
     40     int iflags = flags;
     41     return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
     42                                                err_ret, &iflags);
     43 }
     44 
     45 node *
     46 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
     47                           grammar *g, int start,
     48                           perrdetail *err_ret, int *flags)
     49 {
     50     struct tok_state *tok;
     51 
     52     initerr(err_ret, filename);
     53 
     54     if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
     55         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
     56         return NULL;
     57     }
     58 
     59     tok->filename = filename ? filename : "<string>";
     60     if (Py_TabcheckFlag || Py_VerboseFlag) {
     61         tok->altwarning = (tok->filename != NULL);
     62         if (Py_TabcheckFlag >= 2)
     63             tok->alterror++;
     64     }
     65 
     66     return parsetok(tok, g, start, err_ret, flags);
     67 }
     68 
     69 /* Parse input coming from a file.  Return error code, print some errors. */
     70 
     71 node *
     72 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
     73                    char *ps1, char *ps2, perrdetail *err_ret)
     74 {
     75     return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
     76                                    err_ret, 0);
     77 }
     78 
     79 node *
     80 PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
     81                         char *ps1, char *ps2, perrdetail *err_ret, int flags)
     82 {
     83     int iflags = flags;
     84     return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
     85 }
     86 
     87 node *
     88 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
     89                           char *ps1, char *ps2, perrdetail *err_ret, int *flags)
     90 {
     91     struct tok_state *tok;
     92 
     93     initerr(err_ret, filename);
     94 
     95     if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {
     96         err_ret->error = E_NOMEM;
     97         return NULL;
     98     }
     99     tok->filename = filename;
    100     if (Py_TabcheckFlag || Py_VerboseFlag) {
    101         tok->altwarning = (filename != NULL);
    102         if (Py_TabcheckFlag >= 2)
    103             tok->alterror++;
    104     }
    105 
    106     return parsetok(tok, g, start, err_ret, flags);
    107 }
    108 
    109 #if 0
    110 static char with_msg[] =
    111 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
    112 
    113 static char as_msg[] =
    114 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
    115 
    116 static void
    117 warn(const char *msg, const char *filename, int lineno)
    118 {
    119     if (filename == NULL)
    120         filename = "<string>";
    121     PySys_WriteStderr(msg, filename, lineno);
    122 }
    123 #endif
    124 
    125 /* Parse input coming from the given tokenizer structure.
    126    Return error code. */
    127 
    128 static node *
    129 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
    130          int *flags)
    131 {
    132     parser_state *ps;
    133     node *n;
    134     int started = 0;
    135 
    136     if ((ps = PyParser_New(g, start)) == NULL) {
    137         fprintf(stderr, "no mem for new parser\n");
    138         err_ret->error = E_NOMEM;
    139         PyTokenizer_Free(tok);
    140         return NULL;
    141     }
    142 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    143     if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
    144         ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
    145     }
    146     if (*flags & PyPARSE_UNICODE_LITERALS) {
    147         ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
    148     }
    149 
    150 #endif
    151 
    152     for (;;) {
    153         char *a, *b;
    154         int type;
    155         size_t len;
    156         char *str;
    157         int col_offset;
    158 
    159         type = PyTokenizer_Get(tok, &a, &b);
    160         if (type == ERRORTOKEN) {
    161             err_ret->error = tok->done;
    162             break;
    163         }
    164         if (type == ENDMARKER && started) {
    165             type = NEWLINE; /* Add an extra newline */
    166             started = 0;
    167             /* Add the right number of dedent tokens,
    168                except if a certain flag is given --
    169                codeop.py uses this. */
    170             if (tok->indent &&
    171                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
    172             {
    173                 tok->pendin = -tok->indent;
    174                 tok->indent = 0;
    175             }
    176         }
    177         else
    178             started = 1;
    179         len = b - a; /* XXX this may compute NULL - NULL */
    180         str = (char *) PyObject_MALLOC(len + 1);
    181         if (str == NULL) {
    182             fprintf(stderr, "no mem for next token\n");
    183             err_ret->error = E_NOMEM;
    184             break;
    185         }
    186         if (len > 0)
    187             strncpy(str, a, len);
    188         str[len] = '\0';
    189 
    190 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    191 #endif
    192         if (a >= tok->line_start)
    193             col_offset = a - tok->line_start;
    194         else
    195             col_offset = -1;
    196 
    197         if ((err_ret->error =
    198              PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
    199                                &(err_ret->expected))) != E_OK) {
    200             if (err_ret->error != E_DONE) {
    201                 PyObject_FREE(str);
    202                 err_ret->token = type;
    203             }
    204             break;
    205         }
    206     }
    207 
    208     if (err_ret->error == E_DONE) {
    209         n = ps->p_tree;
    210         ps->p_tree = NULL;
    211     }
    212     else
    213         n = NULL;
    214 
    215 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    216     *flags = ps->p_flags;
    217 #endif
    218     PyParser_Delete(ps);
    219 
    220     if (n == NULL) {
    221         if (tok->lineno <= 1 && tok->done == E_EOF)
    222             err_ret->error = E_EOF;
    223         err_ret->lineno = tok->lineno;
    224         if (tok->buf != NULL) {
    225             char *text = NULL;
    226             size_t len;
    227             assert(tok->cur - tok->buf < INT_MAX);
    228             err_ret->offset = (int)(tok->cur - tok->buf);
    229             len = tok->inp - tok->buf;
    230 #ifdef Py_USING_UNICODE
    231             text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
    232 
    233 #endif
    234             if (text == NULL) {
    235                 text = (char *) PyObject_MALLOC(len + 1);
    236                 if (text != NULL) {
    237                     if (len > 0)
    238                         strncpy(text, tok->buf, len);
    239                     text[len] = '\0';
    240                 }
    241             }
    242             err_ret->text = text;
    243         }
    244     } else if (tok->encoding != NULL) {
    245         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
    246          * allocated using PyMem_
    247          */
    248         node* r = PyNode_New(encoding_decl);
    249         if (r)
    250             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
    251         if (!r || !r->n_str) {
    252             err_ret->error = E_NOMEM;
    253             if (r)
    254                 PyObject_FREE(r);
    255             n = NULL;
    256             goto done;
    257         }
    258         strcpy(r->n_str, tok->encoding);
    259         PyMem_FREE(tok->encoding);
    260         tok->encoding = NULL;
    261         r->n_nchildren = 1;
    262         r->n_child = n;
    263         n = r;
    264     }
    265 
    266 done:
    267     PyTokenizer_Free(tok);
    268 
    269     return n;
    270 }
    271 
    272 static void
    273 initerr(perrdetail *err_ret, const char *filename)
    274 {
    275     err_ret->error = E_OK;
    276     err_ret->filename = filename;
    277     err_ret->lineno = 0;
    278     err_ret->offset = 0;
    279     err_ret->text = NULL;
    280     err_ret->token = -1;
    281     err_ret->expected = -1;
    282 }
    283