1 2 /* Parser-tokenizer link implementation */ 3 4 #include "pgenheaders.h" 5 #include "tokenizer.h" 6 #include "node.h" 7 #include "grammar.h" 8 #include "parser.h" 9 #include "parsetok.h" 10 #include "errcode.h" 11 #include "graminit.h" 12 13 14 /* Forward */ 15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); 16 static int initerr(perrdetail *err_ret, PyObject * filename); 17 18 /* Parse input coming from a string. Return error code, print some errors. */ 19 node * 20 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) 21 { 22 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); 23 } 24 25 node * 26 PyParser_ParseStringFlags(const char *s, grammar *g, int start, 27 perrdetail *err_ret, int flags) 28 { 29 return PyParser_ParseStringFlagsFilename(s, NULL, 30 g, start, err_ret, flags); 31 } 32 33 node * 34 PyParser_ParseStringFlagsFilename(const char *s, const char *filename, 35 grammar *g, int start, 36 perrdetail *err_ret, int flags) 37 { 38 int iflags = flags; 39 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, 40 err_ret, &iflags); 41 } 42 43 node * 44 PyParser_ParseStringObject(const char *s, PyObject *filename, 45 grammar *g, int start, 46 perrdetail *err_ret, int *flags) 47 { 48 struct tok_state *tok; 49 int exec_input = start == file_input; 50 51 if (initerr(err_ret, filename) < 0) 52 return NULL; 53 54 if (*flags & PyPARSE_IGNORE_COOKIE) 55 tok = PyTokenizer_FromUTF8(s, exec_input); 56 else 57 tok = PyTokenizer_FromString(s, exec_input); 58 if (tok == NULL) { 59 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; 60 return NULL; 61 } 62 63 #ifndef PGEN 64 Py_INCREF(err_ret->filename); 65 tok->filename = err_ret->filename; 66 #endif 67 return parsetok(tok, g, start, err_ret, flags); 68 } 69 70 node * 71 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str, 72 grammar *g, int start, 73 perrdetail *err_ret, int *flags) 74 { 75 node *n; 76 PyObject *filename = NULL; 77 #ifndef PGEN 78 if (filename_str != NULL) { 79 filename = PyUnicode_DecodeFSDefault(filename_str); 80 if (filename == NULL) { 81 err_ret->error = E_ERROR; 82 return NULL; 83 } 84 } 85 #endif 86 n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags); 87 #ifndef PGEN 88 Py_XDECREF(filename); 89 #endif 90 return n; 91 } 92 93 /* Parse input coming from a file. Return error code, print some errors. */ 94 95 node * 96 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, 97 const char *ps1, const char *ps2, 98 perrdetail *err_ret) 99 { 100 return PyParser_ParseFileFlags(fp, filename, NULL, 101 g, start, ps1, ps2, err_ret, 0); 102 } 103 104 node * 105 PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc, 106 grammar *g, int start, 107 const char *ps1, const char *ps2, 108 perrdetail *err_ret, int flags) 109 { 110 int iflags = flags; 111 return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1, 112 ps2, err_ret, &iflags); 113 } 114 115 node * 116 PyParser_ParseFileObject(FILE *fp, PyObject *filename, 117 const char *enc, grammar *g, int start, 118 const char *ps1, const char *ps2, 119 perrdetail *err_ret, int *flags) 120 { 121 struct tok_state *tok; 122 123 if (initerr(err_ret, filename) < 0) 124 return NULL; 125 126 if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) { 127 err_ret->error = E_NOMEM; 128 return NULL; 129 } 130 #ifndef PGEN 131 Py_INCREF(err_ret->filename); 132 tok->filename = err_ret->filename; 133 #endif 134 return parsetok(tok, g, start, err_ret, flags); 135 } 136 137 node * 138 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, 139 const char *enc, grammar *g, int start, 140 const char *ps1, const char *ps2, 141 perrdetail *err_ret, int *flags) 142 { 143 node *n; 144 PyObject *fileobj = NULL; 145 #ifndef PGEN 146 if (filename != NULL) { 147 fileobj = PyUnicode_DecodeFSDefault(filename); 148 if (fileobj == NULL) { 149 err_ret->error = E_ERROR; 150 return NULL; 151 } 152 } 153 #endif 154 n = PyParser_ParseFileObject(fp, fileobj, enc, g, 155 start, ps1, ps2, err_ret, flags); 156 #ifndef PGEN 157 Py_XDECREF(fileobj); 158 #endif 159 return n; 160 } 161 162 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 163 #if 0 164 static const char with_msg[] = 165 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; 166 167 static const char as_msg[] = 168 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; 169 170 static void 171 warn(const char *msg, const char *filename, int lineno) 172 { 173 if (filename == NULL) 174 filename = "<string>"; 175 PySys_WriteStderr(msg, filename, lineno); 176 } 177 #endif 178 #endif 179 180 /* Parse input coming from the given tokenizer structure. 181 Return error code. */ 182 183 static node * 184 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, 185 int *flags) 186 { 187 parser_state *ps; 188 node *n; 189 int started = 0; 190 191 if ((ps = PyParser_New(g, start)) == NULL) { 192 err_ret->error = E_NOMEM; 193 PyTokenizer_Free(tok); 194 return NULL; 195 } 196 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 197 if (*flags & PyPARSE_BARRY_AS_BDFL) 198 ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL; 199 #endif 200 201 for (;;) { 202 char *a, *b; 203 int type; 204 size_t len; 205 char *str; 206 int col_offset; 207 208 type = PyTokenizer_Get(tok, &a, &b); 209 if (type == ERRORTOKEN) { 210 err_ret->error = tok->done; 211 break; 212 } 213 if (type == ENDMARKER && started) { 214 type = NEWLINE; /* Add an extra newline */ 215 started = 0; 216 /* Add the right number of dedent tokens, 217 except if a certain flag is given -- 218 codeop.py uses this. */ 219 if (tok->indent && 220 !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) 221 { 222 tok->pendin = -tok->indent; 223 tok->indent = 0; 224 } 225 } 226 else 227 started = 1; 228 len = (a != NULL && b != NULL) ? b - a : 0; 229 str = (char *) PyObject_MALLOC(len + 1); 230 if (str == NULL) { 231 err_ret->error = E_NOMEM; 232 break; 233 } 234 if (len > 0) 235 strncpy(str, a, len); 236 str[len] = '\0'; 237 238 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 239 if (type == NOTEQUAL) { 240 if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && 241 strcmp(str, "!=")) { 242 PyObject_FREE(str); 243 err_ret->error = E_SYNTAX; 244 break; 245 } 246 else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) && 247 strcmp(str, "<>")) { 248 PyObject_FREE(str); 249 err_ret->expected = NOTEQUAL; 250 err_ret->error = E_SYNTAX; 251 break; 252 } 253 } 254 #endif 255 if (a != NULL && a >= tok->line_start) { 256 col_offset = Py_SAFE_DOWNCAST(a - tok->line_start, 257 intptr_t, int); 258 } 259 else { 260 col_offset = -1; 261 } 262 263 if ((err_ret->error = 264 PyParser_AddToken(ps, (int)type, str, 265 tok->lineno, col_offset, 266 &(err_ret->expected))) != E_OK) { 267 if (err_ret->error != E_DONE) { 268 PyObject_FREE(str); 269 err_ret->token = type; 270 } 271 break; 272 } 273 } 274 275 if (err_ret->error == E_DONE) { 276 n = ps->p_tree; 277 ps->p_tree = NULL; 278 279 #ifndef PGEN 280 /* Check that the source for a single input statement really 281 is a single statement by looking at what is left in the 282 buffer after parsing. Trailing whitespace and comments 283 are OK. */ 284 if (start == single_input) { 285 char *cur = tok->cur; 286 char c = *tok->cur; 287 288 for (;;) { 289 while (c == ' ' || c == '\t' || c == '\n' || c == '\014') 290 c = *++cur; 291 292 if (!c) 293 break; 294 295 if (c != '#') { 296 err_ret->error = E_BADSINGLE; 297 PyNode_Free(n); 298 n = NULL; 299 break; 300 } 301 302 /* Suck up comment. */ 303 while (c && c != '\n') 304 c = *++cur; 305 } 306 } 307 #endif 308 } 309 else 310 n = NULL; 311 312 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 313 *flags = ps->p_flags; 314 #endif 315 PyParser_Delete(ps); 316 317 if (n == NULL) { 318 if (tok->done == E_EOF) 319 err_ret->error = E_EOF; 320 err_ret->lineno = tok->lineno; 321 if (tok->buf != NULL) { 322 size_t len; 323 assert(tok->cur - tok->buf < INT_MAX); 324 err_ret->offset = (int)(tok->cur - tok->buf); 325 len = tok->inp - tok->buf; 326 err_ret->text = (char *) PyObject_MALLOC(len + 1); 327 if (err_ret->text != NULL) { 328 if (len > 0) 329 strncpy(err_ret->text, tok->buf, len); 330 err_ret->text[len] = '\0'; 331 } 332 } 333 } else if (tok->encoding != NULL) { 334 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was 335 * allocated using PyMem_ 336 */ 337 node* r = PyNode_New(encoding_decl); 338 if (r) 339 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); 340 if (!r || !r->n_str) { 341 err_ret->error = E_NOMEM; 342 if (r) 343 PyObject_FREE(r); 344 n = NULL; 345 goto done; 346 } 347 strcpy(r->n_str, tok->encoding); 348 PyMem_FREE(tok->encoding); 349 tok->encoding = NULL; 350 r->n_nchildren = 1; 351 r->n_child = n; 352 n = r; 353 } 354 355 done: 356 PyTokenizer_Free(tok); 357 358 return n; 359 } 360 361 static int 362 initerr(perrdetail *err_ret, PyObject *filename) 363 { 364 err_ret->error = E_OK; 365 err_ret->lineno = 0; 366 err_ret->offset = 0; 367 err_ret->text = NULL; 368 err_ret->token = -1; 369 err_ret->expected = -1; 370 #ifndef PGEN 371 if (filename) { 372 Py_INCREF(filename); 373 err_ret->filename = filename; 374 } 375 else { 376 err_ret->filename = PyUnicode_FromString("<string>"); 377 if (err_ret->filename == NULL) { 378 err_ret->error = E_ERROR; 379 return -1; 380 } 381 } 382 #endif 383 return 0; 384 } 385