1 2 /* Parser-tokenizer link implementation */ 3 4 #include "pgenheaders.h" 5 #include "tokenizer.h" 6 #include "node.h" 7 #include "grammar.h" 8 #include "parser.h" 9 #include "parsetok.h" 10 #include "errcode.h" 11 #include "graminit.h" 12 13 int Py_TabcheckFlag; 14 15 16 /* Forward */ 17 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); 18 static void initerr(perrdetail *err_ret, const char* filename); 19 20 /* Parse input coming from a string. Return error code, print some errors. */ 21 node * 22 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) 23 { 24 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); 25 } 26 27 node * 28 PyParser_ParseStringFlags(const char *s, grammar *g, int start, 29 perrdetail *err_ret, int flags) 30 { 31 return PyParser_ParseStringFlagsFilename(s, NULL, 32 g, start, err_ret, flags); 33 } 34 35 node * 36 PyParser_ParseStringFlagsFilename(const char *s, const char *filename, 37 grammar *g, int start, 38 perrdetail *err_ret, int flags) 39 { 40 int iflags = flags; 41 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, 42 err_ret, &iflags); 43 } 44 45 node * 46 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, 47 grammar *g, int start, 48 perrdetail *err_ret, int *flags) 49 { 50 struct tok_state *tok; 51 52 initerr(err_ret, filename); 53 54 if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) { 55 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; 56 return NULL; 57 } 58 59 tok->filename = filename ? filename : "<string>"; 60 if (Py_TabcheckFlag || Py_VerboseFlag) { 61 tok->altwarning = (tok->filename != NULL); 62 if (Py_TabcheckFlag >= 2) 63 tok->alterror++; 64 } 65 66 return parsetok(tok, g, start, err_ret, flags); 67 } 68 69 /* Parse input coming from a file. Return error code, print some errors. */ 70 71 node * 72 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, 73 char *ps1, char *ps2, perrdetail *err_ret) 74 { 75 return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, 76 err_ret, 0); 77 } 78 79 node * 80 PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, 81 char *ps1, char *ps2, perrdetail *err_ret, int flags) 82 { 83 int iflags = flags; 84 return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags); 85 } 86 87 node * 88 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start, 89 char *ps1, char *ps2, perrdetail *err_ret, int *flags) 90 { 91 struct tok_state *tok; 92 93 initerr(err_ret, filename); 94 95 if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { 96 err_ret->error = E_NOMEM; 97 return NULL; 98 } 99 tok->filename = filename; 100 if (Py_TabcheckFlag || Py_VerboseFlag) { 101 tok->altwarning = (filename != NULL); 102 if (Py_TabcheckFlag >= 2) 103 tok->alterror++; 104 } 105 106 return parsetok(tok, g, start, err_ret, flags); 107 } 108 109 #if 0 110 static char with_msg[] = 111 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; 112 113 static char as_msg[] = 114 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; 115 116 static void 117 warn(const char *msg, const char *filename, int lineno) 118 { 119 if (filename == NULL) 120 filename = "<string>"; 121 PySys_WriteStderr(msg, filename, lineno); 122 } 123 #endif 124 125 /* Parse input coming from the given tokenizer structure. 126 Return error code. */ 127 128 static node * 129 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, 130 int *flags) 131 { 132 parser_state *ps; 133 node *n; 134 int started = 0; 135 136 if ((ps = PyParser_New(g, start)) == NULL) { 137 fprintf(stderr, "no mem for new parser\n"); 138 err_ret->error = E_NOMEM; 139 PyTokenizer_Free(tok); 140 return NULL; 141 } 142 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 143 if (*flags & PyPARSE_PRINT_IS_FUNCTION) { 144 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 145 } 146 if (*flags & PyPARSE_UNICODE_LITERALS) { 147 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 148 } 149 150 #endif 151 152 for (;;) { 153 char *a, *b; 154 int type; 155 size_t len; 156 char *str; 157 int col_offset; 158 159 type = PyTokenizer_Get(tok, &a, &b); 160 if (type == ERRORTOKEN) { 161 err_ret->error = tok->done; 162 break; 163 } 164 if (type == ENDMARKER && started) { 165 type = NEWLINE; /* Add an extra newline */ 166 started = 0; 167 /* Add the right number of dedent tokens, 168 except if a certain flag is given -- 169 codeop.py uses this. */ 170 if (tok->indent && 171 !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) 172 { 173 tok->pendin = -tok->indent; 174 tok->indent = 0; 175 } 176 } 177 else 178 started = 1; 179 len = b - a; /* XXX this may compute NULL - NULL */ 180 str = (char *) PyObject_MALLOC(len + 1); 181 if (str == NULL) { 182 fprintf(stderr, "no mem for next token\n"); 183 err_ret->error = E_NOMEM; 184 break; 185 } 186 if (len > 0) 187 strncpy(str, a, len); 188 str[len] = '\0'; 189 190 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 191 #endif 192 if (a >= tok->line_start) 193 col_offset = a - tok->line_start; 194 else 195 col_offset = -1; 196 197 if ((err_ret->error = 198 PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, 199 &(err_ret->expected))) != E_OK) { 200 if (err_ret->error != E_DONE) { 201 PyObject_FREE(str); 202 err_ret->token = type; 203 } 204 break; 205 } 206 } 207 208 if (err_ret->error == E_DONE) { 209 n = ps->p_tree; 210 ps->p_tree = NULL; 211 } 212 else 213 n = NULL; 214 215 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 216 *flags = ps->p_flags; 217 #endif 218 PyParser_Delete(ps); 219 220 if (n == NULL) { 221 if (tok->lineno <= 1 && tok->done == E_EOF) 222 err_ret->error = E_EOF; 223 err_ret->lineno = tok->lineno; 224 if (tok->buf != NULL) { 225 char *text = NULL; 226 size_t len; 227 assert(tok->cur - tok->buf < INT_MAX); 228 err_ret->offset = (int)(tok->cur - tok->buf); 229 len = tok->inp - tok->buf; 230 #ifdef Py_USING_UNICODE 231 text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); 232 233 #endif 234 if (text == NULL) { 235 text = (char *) PyObject_MALLOC(len + 1); 236 if (text != NULL) { 237 if (len > 0) 238 strncpy(text, tok->buf, len); 239 text[len] = '\0'; 240 } 241 } 242 err_ret->text = text; 243 } 244 } else if (tok->encoding != NULL) { 245 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was 246 * allocated using PyMem_ 247 */ 248 node* r = PyNode_New(encoding_decl); 249 if (r) 250 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); 251 if (!r || !r->n_str) { 252 err_ret->error = E_NOMEM; 253 if (r) 254 PyObject_FREE(r); 255 n = NULL; 256 goto done; 257 } 258 strcpy(r->n_str, tok->encoding); 259 PyMem_FREE(tok->encoding); 260 tok->encoding = NULL; 261 r->n_nchildren = 1; 262 r->n_child = n; 263 n = r; 264 } 265 266 done: 267 PyTokenizer_Free(tok); 268 269 return n; 270 } 271 272 static void 273 initerr(perrdetail *err_ret, const char *filename) 274 { 275 err_ret->error = E_OK; 276 err_ret->filename = filename; 277 err_ret->lineno = 0; 278 err_ret->offset = 0; 279 err_ret->text = NULL; 280 err_ret->token = -1; 281 err_ret->expected = -1; 282 } 283