1 /* 2 * This file includes functions to transform a concrete syntax tree (CST) to 3 * an abstract syntax tree (AST). The main function is PyAST_FromNode(). 4 * 5 */ 6 #include "Python.h" 7 #include "Python-ast.h" 8 #include "node.h" 9 #include "ast.h" 10 #include "token.h" 11 12 #include <assert.h> 13 14 static int validate_stmts(asdl_seq *); 15 static int validate_exprs(asdl_seq *, expr_context_ty, int); 16 static int validate_nonempty_seq(asdl_seq *, const char *, const char *); 17 static int validate_stmt(stmt_ty); 18 static int validate_expr(expr_ty, expr_context_ty); 19 20 static int 21 validate_comprehension(asdl_seq *gens) 22 { 23 int i; 24 if (!asdl_seq_LEN(gens)) { 25 PyErr_SetString(PyExc_ValueError, "comprehension with no generators"); 26 return 0; 27 } 28 for (i = 0; i < asdl_seq_LEN(gens); i++) { 29 comprehension_ty comp = asdl_seq_GET(gens, i); 30 if (!validate_expr(comp->target, Store) || 31 !validate_expr(comp->iter, Load) || 32 !validate_exprs(comp->ifs, Load, 0)) 33 return 0; 34 } 35 return 1; 36 } 37 38 static int 39 validate_slice(slice_ty slice) 40 { 41 switch (slice->kind) { 42 case Slice_kind: 43 return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) && 44 (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) && 45 (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load)); 46 case ExtSlice_kind: { 47 int i; 48 if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice")) 49 return 0; 50 for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++) 51 if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i))) 52 return 0; 53 return 1; 54 } 55 case Index_kind: 56 return validate_expr(slice->v.Index.value, Load); 57 default: 58 PyErr_SetString(PyExc_SystemError, "unknown slice node"); 59 return 0; 60 } 61 } 62 63 static int 64 validate_keywords(asdl_seq *keywords) 65 { 66 int i; 67 for (i = 0; i < asdl_seq_LEN(keywords); i++) 68 if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load)) 69 return 0; 70 return 1; 71 } 72 73 static int 74 validate_args(asdl_seq *args) 75 { 76 int i; 77 for (i = 0; i < asdl_seq_LEN(args); i++) { 78 arg_ty arg = asdl_seq_GET(args, i); 79 if (arg->annotation && !validate_expr(arg->annotation, Load)) 80 return 0; 81 } 82 return 1; 83 } 84 85 static const char * 86 expr_context_name(expr_context_ty ctx) 87 { 88 switch (ctx) { 89 case Load: 90 return "Load"; 91 case Store: 92 return "Store"; 93 case Del: 94 return "Del"; 95 case AugLoad: 96 return "AugLoad"; 97 case AugStore: 98 return "AugStore"; 99 case Param: 100 return "Param"; 101 default: 102 assert(0); 103 return "(unknown)"; 104 } 105 } 106 107 static int 108 validate_arguments(arguments_ty args) 109 { 110 if (!validate_args(args->args)) 111 return 0; 112 if (args->vararg && args->vararg->annotation 113 && !validate_expr(args->vararg->annotation, Load)) { 114 return 0; 115 } 116 if (!validate_args(args->kwonlyargs)) 117 return 0; 118 if (args->kwarg && args->kwarg->annotation 119 && !validate_expr(args->kwarg->annotation, Load)) { 120 return 0; 121 } 122 if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) { 123 PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments"); 124 return 0; 125 } 126 if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) { 127 PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as " 128 "kw_defaults on arguments"); 129 return 0; 130 } 131 return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1); 132 } 133 134 static int 135 validate_constant(PyObject *value) 136 { 137 if (value == Py_None || value == Py_Ellipsis) 138 return 1; 139 140 if (PyLong_CheckExact(value) 141 || PyFloat_CheckExact(value) 142 || PyComplex_CheckExact(value) 143 || PyBool_Check(value) 144 || PyUnicode_CheckExact(value) 145 || PyBytes_CheckExact(value)) 146 return 1; 147 148 if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) { 149 PyObject *it; 150 151 it = PyObject_GetIter(value); 152 if (it == NULL) 153 return 0; 154 155 while (1) { 156 PyObject *item = PyIter_Next(it); 157 if (item == NULL) { 158 if (PyErr_Occurred()) { 159 Py_DECREF(it); 160 return 0; 161 } 162 break; 163 } 164 165 if (!validate_constant(item)) { 166 Py_DECREF(it); 167 Py_DECREF(item); 168 return 0; 169 } 170 Py_DECREF(item); 171 } 172 173 Py_DECREF(it); 174 return 1; 175 } 176 177 return 0; 178 } 179 180 static int 181 validate_expr(expr_ty exp, expr_context_ty ctx) 182 { 183 int check_ctx = 1; 184 expr_context_ty actual_ctx; 185 186 /* First check expression context. */ 187 switch (exp->kind) { 188 case Attribute_kind: 189 actual_ctx = exp->v.Attribute.ctx; 190 break; 191 case Subscript_kind: 192 actual_ctx = exp->v.Subscript.ctx; 193 break; 194 case Starred_kind: 195 actual_ctx = exp->v.Starred.ctx; 196 break; 197 case Name_kind: 198 actual_ctx = exp->v.Name.ctx; 199 break; 200 case List_kind: 201 actual_ctx = exp->v.List.ctx; 202 break; 203 case Tuple_kind: 204 actual_ctx = exp->v.Tuple.ctx; 205 break; 206 default: 207 if (ctx != Load) { 208 PyErr_Format(PyExc_ValueError, "expression which can't be " 209 "assigned to in %s context", expr_context_name(ctx)); 210 return 0; 211 } 212 check_ctx = 0; 213 /* set actual_ctx to prevent gcc warning */ 214 actual_ctx = 0; 215 } 216 if (check_ctx && actual_ctx != ctx) { 217 PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead", 218 expr_context_name(ctx), expr_context_name(actual_ctx)); 219 return 0; 220 } 221 222 /* Now validate expression. */ 223 switch (exp->kind) { 224 case BoolOp_kind: 225 if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) { 226 PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values"); 227 return 0; 228 } 229 return validate_exprs(exp->v.BoolOp.values, Load, 0); 230 case BinOp_kind: 231 return validate_expr(exp->v.BinOp.left, Load) && 232 validate_expr(exp->v.BinOp.right, Load); 233 case UnaryOp_kind: 234 return validate_expr(exp->v.UnaryOp.operand, Load); 235 case Lambda_kind: 236 return validate_arguments(exp->v.Lambda.args) && 237 validate_expr(exp->v.Lambda.body, Load); 238 case IfExp_kind: 239 return validate_expr(exp->v.IfExp.test, Load) && 240 validate_expr(exp->v.IfExp.body, Load) && 241 validate_expr(exp->v.IfExp.orelse, Load); 242 case Dict_kind: 243 if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) { 244 PyErr_SetString(PyExc_ValueError, 245 "Dict doesn't have the same number of keys as values"); 246 return 0; 247 } 248 /* null_ok=1 for keys expressions to allow dict unpacking to work in 249 dict literals, i.e. ``{**{a:b}}`` */ 250 return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) && 251 validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0); 252 case Set_kind: 253 return validate_exprs(exp->v.Set.elts, Load, 0); 254 #define COMP(NAME) \ 255 case NAME ## _kind: \ 256 return validate_comprehension(exp->v.NAME.generators) && \ 257 validate_expr(exp->v.NAME.elt, Load); 258 COMP(ListComp) 259 COMP(SetComp) 260 COMP(GeneratorExp) 261 #undef COMP 262 case DictComp_kind: 263 return validate_comprehension(exp->v.DictComp.generators) && 264 validate_expr(exp->v.DictComp.key, Load) && 265 validate_expr(exp->v.DictComp.value, Load); 266 case Yield_kind: 267 return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load); 268 case YieldFrom_kind: 269 return validate_expr(exp->v.YieldFrom.value, Load); 270 case Await_kind: 271 return validate_expr(exp->v.Await.value, Load); 272 case Compare_kind: 273 if (!asdl_seq_LEN(exp->v.Compare.comparators)) { 274 PyErr_SetString(PyExc_ValueError, "Compare with no comparators"); 275 return 0; 276 } 277 if (asdl_seq_LEN(exp->v.Compare.comparators) != 278 asdl_seq_LEN(exp->v.Compare.ops)) { 279 PyErr_SetString(PyExc_ValueError, "Compare has a different number " 280 "of comparators and operands"); 281 return 0; 282 } 283 return validate_exprs(exp->v.Compare.comparators, Load, 0) && 284 validate_expr(exp->v.Compare.left, Load); 285 case Call_kind: 286 return validate_expr(exp->v.Call.func, Load) && 287 validate_exprs(exp->v.Call.args, Load, 0) && 288 validate_keywords(exp->v.Call.keywords); 289 case Constant_kind: 290 if (!validate_constant(exp->v.Constant.value)) { 291 PyErr_Format(PyExc_TypeError, 292 "got an invalid type in Constant: %s", 293 Py_TYPE(exp->v.Constant.value)->tp_name); 294 return 0; 295 } 296 return 1; 297 case Num_kind: { 298 PyObject *n = exp->v.Num.n; 299 if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) && 300 !PyComplex_CheckExact(n)) { 301 PyErr_SetString(PyExc_TypeError, "non-numeric type in Num"); 302 return 0; 303 } 304 return 1; 305 } 306 case Str_kind: { 307 PyObject *s = exp->v.Str.s; 308 if (!PyUnicode_CheckExact(s)) { 309 PyErr_SetString(PyExc_TypeError, "non-string type in Str"); 310 return 0; 311 } 312 return 1; 313 } 314 case JoinedStr_kind: 315 return validate_exprs(exp->v.JoinedStr.values, Load, 0); 316 case FormattedValue_kind: 317 if (validate_expr(exp->v.FormattedValue.value, Load) == 0) 318 return 0; 319 if (exp->v.FormattedValue.format_spec) 320 return validate_expr(exp->v.FormattedValue.format_spec, Load); 321 return 1; 322 case Bytes_kind: { 323 PyObject *b = exp->v.Bytes.s; 324 if (!PyBytes_CheckExact(b)) { 325 PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes"); 326 return 0; 327 } 328 return 1; 329 } 330 case Attribute_kind: 331 return validate_expr(exp->v.Attribute.value, Load); 332 case Subscript_kind: 333 return validate_slice(exp->v.Subscript.slice) && 334 validate_expr(exp->v.Subscript.value, Load); 335 case Starred_kind: 336 return validate_expr(exp->v.Starred.value, ctx); 337 case List_kind: 338 return validate_exprs(exp->v.List.elts, ctx, 0); 339 case Tuple_kind: 340 return validate_exprs(exp->v.Tuple.elts, ctx, 0); 341 /* These last cases don't have any checking. */ 342 case Name_kind: 343 case NameConstant_kind: 344 case Ellipsis_kind: 345 return 1; 346 default: 347 PyErr_SetString(PyExc_SystemError, "unexpected expression"); 348 return 0; 349 } 350 } 351 352 static int 353 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner) 354 { 355 if (asdl_seq_LEN(seq)) 356 return 1; 357 PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner); 358 return 0; 359 } 360 361 static int 362 validate_assignlist(asdl_seq *targets, expr_context_ty ctx) 363 { 364 return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") && 365 validate_exprs(targets, ctx, 0); 366 } 367 368 static int 369 validate_body(asdl_seq *body, const char *owner) 370 { 371 return validate_nonempty_seq(body, "body", owner) && validate_stmts(body); 372 } 373 374 static int 375 validate_stmt(stmt_ty stmt) 376 { 377 int i; 378 switch (stmt->kind) { 379 case FunctionDef_kind: 380 return validate_body(stmt->v.FunctionDef.body, "FunctionDef") && 381 validate_arguments(stmt->v.FunctionDef.args) && 382 validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) && 383 (!stmt->v.FunctionDef.returns || 384 validate_expr(stmt->v.FunctionDef.returns, Load)); 385 case ClassDef_kind: 386 return validate_body(stmt->v.ClassDef.body, "ClassDef") && 387 validate_exprs(stmt->v.ClassDef.bases, Load, 0) && 388 validate_keywords(stmt->v.ClassDef.keywords) && 389 validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0); 390 case Return_kind: 391 return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load); 392 case Delete_kind: 393 return validate_assignlist(stmt->v.Delete.targets, Del); 394 case Assign_kind: 395 return validate_assignlist(stmt->v.Assign.targets, Store) && 396 validate_expr(stmt->v.Assign.value, Load); 397 case AugAssign_kind: 398 return validate_expr(stmt->v.AugAssign.target, Store) && 399 validate_expr(stmt->v.AugAssign.value, Load); 400 case AnnAssign_kind: 401 if (stmt->v.AnnAssign.target->kind != Name_kind && 402 stmt->v.AnnAssign.simple) { 403 PyErr_SetString(PyExc_TypeError, 404 "AnnAssign with simple non-Name target"); 405 return 0; 406 } 407 return validate_expr(stmt->v.AnnAssign.target, Store) && 408 (!stmt->v.AnnAssign.value || 409 validate_expr(stmt->v.AnnAssign.value, Load)) && 410 validate_expr(stmt->v.AnnAssign.annotation, Load); 411 case For_kind: 412 return validate_expr(stmt->v.For.target, Store) && 413 validate_expr(stmt->v.For.iter, Load) && 414 validate_body(stmt->v.For.body, "For") && 415 validate_stmts(stmt->v.For.orelse); 416 case AsyncFor_kind: 417 return validate_expr(stmt->v.AsyncFor.target, Store) && 418 validate_expr(stmt->v.AsyncFor.iter, Load) && 419 validate_body(stmt->v.AsyncFor.body, "AsyncFor") && 420 validate_stmts(stmt->v.AsyncFor.orelse); 421 case While_kind: 422 return validate_expr(stmt->v.While.test, Load) && 423 validate_body(stmt->v.While.body, "While") && 424 validate_stmts(stmt->v.While.orelse); 425 case If_kind: 426 return validate_expr(stmt->v.If.test, Load) && 427 validate_body(stmt->v.If.body, "If") && 428 validate_stmts(stmt->v.If.orelse); 429 case With_kind: 430 if (!validate_nonempty_seq(stmt->v.With.items, "items", "With")) 431 return 0; 432 for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) { 433 withitem_ty item = asdl_seq_GET(stmt->v.With.items, i); 434 if (!validate_expr(item->context_expr, Load) || 435 (item->optional_vars && !validate_expr(item->optional_vars, Store))) 436 return 0; 437 } 438 return validate_body(stmt->v.With.body, "With"); 439 case AsyncWith_kind: 440 if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith")) 441 return 0; 442 for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) { 443 withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i); 444 if (!validate_expr(item->context_expr, Load) || 445 (item->optional_vars && !validate_expr(item->optional_vars, Store))) 446 return 0; 447 } 448 return validate_body(stmt->v.AsyncWith.body, "AsyncWith"); 449 case Raise_kind: 450 if (stmt->v.Raise.exc) { 451 return validate_expr(stmt->v.Raise.exc, Load) && 452 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load)); 453 } 454 if (stmt->v.Raise.cause) { 455 PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception"); 456 return 0; 457 } 458 return 1; 459 case Try_kind: 460 if (!validate_body(stmt->v.Try.body, "Try")) 461 return 0; 462 if (!asdl_seq_LEN(stmt->v.Try.handlers) && 463 !asdl_seq_LEN(stmt->v.Try.finalbody)) { 464 PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody"); 465 return 0; 466 } 467 if (!asdl_seq_LEN(stmt->v.Try.handlers) && 468 asdl_seq_LEN(stmt->v.Try.orelse)) { 469 PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers"); 470 return 0; 471 } 472 for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) { 473 excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i); 474 if ((handler->v.ExceptHandler.type && 475 !validate_expr(handler->v.ExceptHandler.type, Load)) || 476 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler")) 477 return 0; 478 } 479 return (!asdl_seq_LEN(stmt->v.Try.finalbody) || 480 validate_stmts(stmt->v.Try.finalbody)) && 481 (!asdl_seq_LEN(stmt->v.Try.orelse) || 482 validate_stmts(stmt->v.Try.orelse)); 483 case Assert_kind: 484 return validate_expr(stmt->v.Assert.test, Load) && 485 (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load)); 486 case Import_kind: 487 return validate_nonempty_seq(stmt->v.Import.names, "names", "Import"); 488 case ImportFrom_kind: 489 if (stmt->v.ImportFrom.level < 0) { 490 PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level"); 491 return 0; 492 } 493 return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom"); 494 case Global_kind: 495 return validate_nonempty_seq(stmt->v.Global.names, "names", "Global"); 496 case Nonlocal_kind: 497 return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal"); 498 case Expr_kind: 499 return validate_expr(stmt->v.Expr.value, Load); 500 case AsyncFunctionDef_kind: 501 return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") && 502 validate_arguments(stmt->v.AsyncFunctionDef.args) && 503 validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) && 504 (!stmt->v.AsyncFunctionDef.returns || 505 validate_expr(stmt->v.AsyncFunctionDef.returns, Load)); 506 case Pass_kind: 507 case Break_kind: 508 case Continue_kind: 509 return 1; 510 default: 511 PyErr_SetString(PyExc_SystemError, "unexpected statement"); 512 return 0; 513 } 514 } 515 516 static int 517 validate_stmts(asdl_seq *seq) 518 { 519 int i; 520 for (i = 0; i < asdl_seq_LEN(seq); i++) { 521 stmt_ty stmt = asdl_seq_GET(seq, i); 522 if (stmt) { 523 if (!validate_stmt(stmt)) 524 return 0; 525 } 526 else { 527 PyErr_SetString(PyExc_ValueError, 528 "None disallowed in statement list"); 529 return 0; 530 } 531 } 532 return 1; 533 } 534 535 static int 536 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok) 537 { 538 int i; 539 for (i = 0; i < asdl_seq_LEN(exprs); i++) { 540 expr_ty expr = asdl_seq_GET(exprs, i); 541 if (expr) { 542 if (!validate_expr(expr, ctx)) 543 return 0; 544 } 545 else if (!null_ok) { 546 PyErr_SetString(PyExc_ValueError, 547 "None disallowed in expression list"); 548 return 0; 549 } 550 551 } 552 return 1; 553 } 554 555 int 556 PyAST_Validate(mod_ty mod) 557 { 558 int res = 0; 559 560 switch (mod->kind) { 561 case Module_kind: 562 res = validate_stmts(mod->v.Module.body); 563 break; 564 case Interactive_kind: 565 res = validate_stmts(mod->v.Interactive.body); 566 break; 567 case Expression_kind: 568 res = validate_expr(mod->v.Expression.body, Load); 569 break; 570 case Suite_kind: 571 PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler"); 572 break; 573 default: 574 PyErr_SetString(PyExc_SystemError, "impossible module node"); 575 res = 0; 576 break; 577 } 578 return res; 579 } 580 581 /* This is done here, so defines like "test" don't interfere with AST use above. */ 582 #include "grammar.h" 583 #include "parsetok.h" 584 #include "graminit.h" 585 586 /* Data structure used internally */ 587 struct compiling { 588 PyArena *c_arena; /* Arena for allocating memory. */ 589 PyObject *c_filename; /* filename */ 590 PyObject *c_normalize; /* Normalization function from unicodedata. */ 591 PyObject *c_normalize_args; /* Normalization argument tuple. */ 592 }; 593 594 static asdl_seq *seq_for_testlist(struct compiling *, const node *); 595 static expr_ty ast_for_expr(struct compiling *, const node *); 596 static stmt_ty ast_for_stmt(struct compiling *, const node *); 597 static asdl_seq *ast_for_suite(struct compiling *, const node *); 598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *, 599 expr_context_ty); 600 static expr_ty ast_for_testlist(struct compiling *, const node *); 601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *); 602 603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, int); 604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, int); 605 606 /* Note different signature for ast_for_call */ 607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); 608 609 static PyObject *parsenumber(struct compiling *, const char *); 610 static expr_ty parsestrplus(struct compiling *, const node *n); 611 612 #define COMP_GENEXP 0 613 #define COMP_LISTCOMP 1 614 #define COMP_SETCOMP 2 615 616 static int 617 init_normalization(struct compiling *c) 618 { 619 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); 620 if (!m) 621 return 0; 622 c->c_normalize = PyObject_GetAttrString(m, "normalize"); 623 Py_DECREF(m); 624 if (!c->c_normalize) 625 return 0; 626 c->c_normalize_args = Py_BuildValue("(sN)", "NFKC", Py_None); 627 if (!c->c_normalize_args) { 628 Py_CLEAR(c->c_normalize); 629 return 0; 630 } 631 PyTuple_SET_ITEM(c->c_normalize_args, 1, NULL); 632 return 1; 633 } 634 635 static identifier 636 new_identifier(const char *n, struct compiling *c) 637 { 638 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); 639 if (!id) 640 return NULL; 641 /* PyUnicode_DecodeUTF8 should always return a ready string. */ 642 assert(PyUnicode_IS_READY(id)); 643 /* Check whether there are non-ASCII characters in the 644 identifier; if so, normalize to NFKC. */ 645 if (!PyUnicode_IS_ASCII(id)) { 646 PyObject *id2; 647 if (!c->c_normalize && !init_normalization(c)) { 648 Py_DECREF(id); 649 return NULL; 650 } 651 PyTuple_SET_ITEM(c->c_normalize_args, 1, id); 652 id2 = PyObject_Call(c->c_normalize, c->c_normalize_args, NULL); 653 Py_DECREF(id); 654 if (!id2) 655 return NULL; 656 id = id2; 657 } 658 PyUnicode_InternInPlace(&id); 659 if (PyArena_AddPyObject(c->c_arena, id) < 0) { 660 Py_DECREF(id); 661 return NULL; 662 } 663 return id; 664 } 665 666 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c) 667 668 static int 669 ast_error(struct compiling *c, const node *n, const char *errmsg) 670 { 671 PyObject *value, *errstr, *loc, *tmp; 672 673 loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n)); 674 if (!loc) { 675 Py_INCREF(Py_None); 676 loc = Py_None; 677 } 678 tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset, loc); 679 if (!tmp) 680 return 0; 681 errstr = PyUnicode_FromString(errmsg); 682 if (!errstr) { 683 Py_DECREF(tmp); 684 return 0; 685 } 686 value = PyTuple_Pack(2, errstr, tmp); 687 Py_DECREF(errstr); 688 Py_DECREF(tmp); 689 if (value) { 690 PyErr_SetObject(PyExc_SyntaxError, value); 691 Py_DECREF(value); 692 } 693 return 0; 694 } 695 696 /* num_stmts() returns number of contained statements. 697 698 Use this routine to determine how big a sequence is needed for 699 the statements in a parse tree. Its raison d'etre is this bit of 700 grammar: 701 702 stmt: simple_stmt | compound_stmt 703 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 704 705 A simple_stmt can contain multiple small_stmt elements joined 706 by semicolons. If the arg is a simple_stmt, the number of 707 small_stmt elements is returned. 708 */ 709 710 static int 711 num_stmts(const node *n) 712 { 713 int i, l; 714 node *ch; 715 716 switch (TYPE(n)) { 717 case single_input: 718 if (TYPE(CHILD(n, 0)) == NEWLINE) 719 return 0; 720 else 721 return num_stmts(CHILD(n, 0)); 722 case file_input: 723 l = 0; 724 for (i = 0; i < NCH(n); i++) { 725 ch = CHILD(n, i); 726 if (TYPE(ch) == stmt) 727 l += num_stmts(ch); 728 } 729 return l; 730 case stmt: 731 return num_stmts(CHILD(n, 0)); 732 case compound_stmt: 733 return 1; 734 case simple_stmt: 735 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */ 736 case suite: 737 if (NCH(n) == 1) 738 return num_stmts(CHILD(n, 0)); 739 else { 740 l = 0; 741 for (i = 2; i < (NCH(n) - 1); i++) 742 l += num_stmts(CHILD(n, i)); 743 return l; 744 } 745 default: { 746 char buf[128]; 747 748 sprintf(buf, "Non-statement found: %d %d", 749 TYPE(n), NCH(n)); 750 Py_FatalError(buf); 751 } 752 } 753 assert(0); 754 return 0; 755 } 756 757 /* Transform the CST rooted at node * to the appropriate AST 758 */ 759 760 mod_ty 761 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags, 762 PyObject *filename, PyArena *arena) 763 { 764 int i, j, k, num; 765 asdl_seq *stmts = NULL; 766 stmt_ty s; 767 node *ch; 768 struct compiling c; 769 mod_ty res = NULL; 770 771 c.c_arena = arena; 772 /* borrowed reference */ 773 c.c_filename = filename; 774 c.c_normalize = NULL; 775 c.c_normalize_args = NULL; 776 777 if (TYPE(n) == encoding_decl) 778 n = CHILD(n, 0); 779 780 k = 0; 781 switch (TYPE(n)) { 782 case file_input: 783 stmts = _Py_asdl_seq_new(num_stmts(n), arena); 784 if (!stmts) 785 goto out; 786 for (i = 0; i < NCH(n) - 1; i++) { 787 ch = CHILD(n, i); 788 if (TYPE(ch) == NEWLINE) 789 continue; 790 REQ(ch, stmt); 791 num = num_stmts(ch); 792 if (num == 1) { 793 s = ast_for_stmt(&c, ch); 794 if (!s) 795 goto out; 796 asdl_seq_SET(stmts, k++, s); 797 } 798 else { 799 ch = CHILD(ch, 0); 800 REQ(ch, simple_stmt); 801 for (j = 0; j < num; j++) { 802 s = ast_for_stmt(&c, CHILD(ch, j * 2)); 803 if (!s) 804 goto out; 805 asdl_seq_SET(stmts, k++, s); 806 } 807 } 808 } 809 res = Module(stmts, arena); 810 break; 811 case eval_input: { 812 expr_ty testlist_ast; 813 814 /* XXX Why not comp_for here? */ 815 testlist_ast = ast_for_testlist(&c, CHILD(n, 0)); 816 if (!testlist_ast) 817 goto out; 818 res = Expression(testlist_ast, arena); 819 break; 820 } 821 case single_input: 822 if (TYPE(CHILD(n, 0)) == NEWLINE) { 823 stmts = _Py_asdl_seq_new(1, arena); 824 if (!stmts) 825 goto out; 826 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset, 827 arena)); 828 if (!asdl_seq_GET(stmts, 0)) 829 goto out; 830 res = Interactive(stmts, arena); 831 } 832 else { 833 n = CHILD(n, 0); 834 num = num_stmts(n); 835 stmts = _Py_asdl_seq_new(num, arena); 836 if (!stmts) 837 goto out; 838 if (num == 1) { 839 s = ast_for_stmt(&c, n); 840 if (!s) 841 goto out; 842 asdl_seq_SET(stmts, 0, s); 843 } 844 else { 845 /* Only a simple_stmt can contain multiple statements. */ 846 REQ(n, simple_stmt); 847 for (i = 0; i < NCH(n); i += 2) { 848 if (TYPE(CHILD(n, i)) == NEWLINE) 849 break; 850 s = ast_for_stmt(&c, CHILD(n, i)); 851 if (!s) 852 goto out; 853 asdl_seq_SET(stmts, i / 2, s); 854 } 855 } 856 857 res = Interactive(stmts, arena); 858 } 859 break; 860 default: 861 PyErr_Format(PyExc_SystemError, 862 "invalid node %d for PyAST_FromNode", TYPE(n)); 863 goto out; 864 } 865 out: 866 if (c.c_normalize) { 867 Py_DECREF(c.c_normalize); 868 PyTuple_SET_ITEM(c.c_normalize_args, 1, NULL); 869 Py_DECREF(c.c_normalize_args); 870 } 871 return res; 872 } 873 874 mod_ty 875 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str, 876 PyArena *arena) 877 { 878 mod_ty mod; 879 PyObject *filename; 880 filename = PyUnicode_DecodeFSDefault(filename_str); 881 if (filename == NULL) 882 return NULL; 883 mod = PyAST_FromNodeObject(n, flags, filename, arena); 884 Py_DECREF(filename); 885 return mod; 886 887 } 888 889 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.) 890 */ 891 892 static operator_ty 893 get_operator(const node *n) 894 { 895 switch (TYPE(n)) { 896 case VBAR: 897 return BitOr; 898 case CIRCUMFLEX: 899 return BitXor; 900 case AMPER: 901 return BitAnd; 902 case LEFTSHIFT: 903 return LShift; 904 case RIGHTSHIFT: 905 return RShift; 906 case PLUS: 907 return Add; 908 case MINUS: 909 return Sub; 910 case STAR: 911 return Mult; 912 case AT: 913 return MatMult; 914 case SLASH: 915 return Div; 916 case DOUBLESLASH: 917 return FloorDiv; 918 case PERCENT: 919 return Mod; 920 default: 921 return (operator_ty)0; 922 } 923 } 924 925 static const char * const FORBIDDEN[] = { 926 "None", 927 "True", 928 "False", 929 NULL, 930 }; 931 932 static int 933 forbidden_name(struct compiling *c, identifier name, const node *n, 934 int full_checks) 935 { 936 assert(PyUnicode_Check(name)); 937 if (_PyUnicode_EqualToASCIIString(name, "__debug__")) { 938 ast_error(c, n, "assignment to keyword"); 939 return 1; 940 } 941 if (_PyUnicode_EqualToASCIIString(name, "async") || 942 _PyUnicode_EqualToASCIIString(name, "await")) 943 { 944 PyObject *message = PyUnicode_FromString( 945 "'async' and 'await' will become reserved keywords" 946 " in Python 3.7"); 947 int ret; 948 if (message == NULL) { 949 return 1; 950 } 951 ret = PyErr_WarnExplicitObject( 952 PyExc_DeprecationWarning, 953 message, 954 c->c_filename, 955 LINENO(n), 956 NULL, 957 NULL); 958 Py_DECREF(message); 959 if (ret < 0) { 960 return 1; 961 } 962 } 963 if (full_checks) { 964 const char * const *p; 965 for (p = FORBIDDEN; *p; p++) { 966 if (_PyUnicode_EqualToASCIIString(name, *p)) { 967 ast_error(c, n, "assignment to keyword"); 968 return 1; 969 } 970 } 971 } 972 return 0; 973 } 974 975 /* Set the context ctx for expr_ty e, recursively traversing e. 976 977 Only sets context for expr kinds that "can appear in assignment context" 978 (according to ../Parser/Python.asdl). For other expr kinds, it sets 979 an appropriate syntax error and returns false. 980 */ 981 982 static int 983 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n) 984 { 985 asdl_seq *s = NULL; 986 /* If a particular expression type can't be used for assign / delete, 987 set expr_name to its name and an error message will be generated. 988 */ 989 const char* expr_name = NULL; 990 991 /* The ast defines augmented store and load contexts, but the 992 implementation here doesn't actually use them. The code may be 993 a little more complex than necessary as a result. It also means 994 that expressions in an augmented assignment have a Store context. 995 Consider restructuring so that augmented assignment uses 996 set_context(), too. 997 */ 998 assert(ctx != AugStore && ctx != AugLoad); 999 1000 switch (e->kind) { 1001 case Attribute_kind: 1002 e->v.Attribute.ctx = ctx; 1003 if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1)) 1004 return 0; 1005 break; 1006 case Subscript_kind: 1007 e->v.Subscript.ctx = ctx; 1008 break; 1009 case Starred_kind: 1010 e->v.Starred.ctx = ctx; 1011 if (!set_context(c, e->v.Starred.value, ctx, n)) 1012 return 0; 1013 break; 1014 case Name_kind: 1015 if (ctx == Store) { 1016 if (forbidden_name(c, e->v.Name.id, n, 0)) 1017 return 0; /* forbidden_name() calls ast_error() */ 1018 } 1019 e->v.Name.ctx = ctx; 1020 break; 1021 case List_kind: 1022 e->v.List.ctx = ctx; 1023 s = e->v.List.elts; 1024 break; 1025 case Tuple_kind: 1026 e->v.Tuple.ctx = ctx; 1027 s = e->v.Tuple.elts; 1028 break; 1029 case Lambda_kind: 1030 expr_name = "lambda"; 1031 break; 1032 case Call_kind: 1033 expr_name = "function call"; 1034 break; 1035 case BoolOp_kind: 1036 case BinOp_kind: 1037 case UnaryOp_kind: 1038 expr_name = "operator"; 1039 break; 1040 case GeneratorExp_kind: 1041 expr_name = "generator expression"; 1042 break; 1043 case Yield_kind: 1044 case YieldFrom_kind: 1045 expr_name = "yield expression"; 1046 break; 1047 case Await_kind: 1048 expr_name = "await expression"; 1049 break; 1050 case ListComp_kind: 1051 expr_name = "list comprehension"; 1052 break; 1053 case SetComp_kind: 1054 expr_name = "set comprehension"; 1055 break; 1056 case DictComp_kind: 1057 expr_name = "dict comprehension"; 1058 break; 1059 case Dict_kind: 1060 case Set_kind: 1061 case Num_kind: 1062 case Str_kind: 1063 case Bytes_kind: 1064 case JoinedStr_kind: 1065 case FormattedValue_kind: 1066 expr_name = "literal"; 1067 break; 1068 case NameConstant_kind: 1069 expr_name = "keyword"; 1070 break; 1071 case Ellipsis_kind: 1072 expr_name = "Ellipsis"; 1073 break; 1074 case Compare_kind: 1075 expr_name = "comparison"; 1076 break; 1077 case IfExp_kind: 1078 expr_name = "conditional expression"; 1079 break; 1080 default: 1081 PyErr_Format(PyExc_SystemError, 1082 "unexpected expression in assignment %d (line %d)", 1083 e->kind, e->lineno); 1084 return 0; 1085 } 1086 /* Check for error string set by switch */ 1087 if (expr_name) { 1088 char buf[300]; 1089 PyOS_snprintf(buf, sizeof(buf), 1090 "can't %s %s", 1091 ctx == Store ? "assign to" : "delete", 1092 expr_name); 1093 return ast_error(c, n, buf); 1094 } 1095 1096 /* If the LHS is a list or tuple, we need to set the assignment 1097 context for all the contained elements. 1098 */ 1099 if (s) { 1100 int i; 1101 1102 for (i = 0; i < asdl_seq_LEN(s); i++) { 1103 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n)) 1104 return 0; 1105 } 1106 } 1107 return 1; 1108 } 1109 1110 static operator_ty 1111 ast_for_augassign(struct compiling *c, const node *n) 1112 { 1113 REQ(n, augassign); 1114 n = CHILD(n, 0); 1115 switch (STR(n)[0]) { 1116 case '+': 1117 return Add; 1118 case '-': 1119 return Sub; 1120 case '/': 1121 if (STR(n)[1] == '/') 1122 return FloorDiv; 1123 else 1124 return Div; 1125 case '%': 1126 return Mod; 1127 case '<': 1128 return LShift; 1129 case '>': 1130 return RShift; 1131 case '&': 1132 return BitAnd; 1133 case '^': 1134 return BitXor; 1135 case '|': 1136 return BitOr; 1137 case '*': 1138 if (STR(n)[1] == '*') 1139 return Pow; 1140 else 1141 return Mult; 1142 case '@': 1143 return MatMult; 1144 default: 1145 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n)); 1146 return (operator_ty)0; 1147 } 1148 } 1149 1150 static cmpop_ty 1151 ast_for_comp_op(struct compiling *c, const node *n) 1152 { 1153 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is' 1154 |'is' 'not' 1155 */ 1156 REQ(n, comp_op); 1157 if (NCH(n) == 1) { 1158 n = CHILD(n, 0); 1159 switch (TYPE(n)) { 1160 case LESS: 1161 return Lt; 1162 case GREATER: 1163 return Gt; 1164 case EQEQUAL: /* == */ 1165 return Eq; 1166 case LESSEQUAL: 1167 return LtE; 1168 case GREATEREQUAL: 1169 return GtE; 1170 case NOTEQUAL: 1171 return NotEq; 1172 case NAME: 1173 if (strcmp(STR(n), "in") == 0) 1174 return In; 1175 if (strcmp(STR(n), "is") == 0) 1176 return Is; 1177 default: 1178 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s", 1179 STR(n)); 1180 return (cmpop_ty)0; 1181 } 1182 } 1183 else if (NCH(n) == 2) { 1184 /* handle "not in" and "is not" */ 1185 switch (TYPE(CHILD(n, 0))) { 1186 case NAME: 1187 if (strcmp(STR(CHILD(n, 1)), "in") == 0) 1188 return NotIn; 1189 if (strcmp(STR(CHILD(n, 0)), "is") == 0) 1190 return IsNot; 1191 default: 1192 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s", 1193 STR(CHILD(n, 0)), STR(CHILD(n, 1))); 1194 return (cmpop_ty)0; 1195 } 1196 } 1197 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children", 1198 NCH(n)); 1199 return (cmpop_ty)0; 1200 } 1201 1202 static asdl_seq * 1203 seq_for_testlist(struct compiling *c, const node *n) 1204 { 1205 /* testlist: test (',' test)* [','] 1206 testlist_star_expr: test|star_expr (',' test|star_expr)* [','] 1207 */ 1208 asdl_seq *seq; 1209 expr_ty expression; 1210 int i; 1211 assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp); 1212 1213 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 1214 if (!seq) 1215 return NULL; 1216 1217 for (i = 0; i < NCH(n); i += 2) { 1218 const node *ch = CHILD(n, i); 1219 assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr); 1220 1221 expression = ast_for_expr(c, ch); 1222 if (!expression) 1223 return NULL; 1224 1225 assert(i / 2 < seq->size); 1226 asdl_seq_SET(seq, i / 2, expression); 1227 } 1228 return seq; 1229 } 1230 1231 static arg_ty 1232 ast_for_arg(struct compiling *c, const node *n) 1233 { 1234 identifier name; 1235 expr_ty annotation = NULL; 1236 node *ch; 1237 arg_ty ret; 1238 1239 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef); 1240 ch = CHILD(n, 0); 1241 name = NEW_IDENTIFIER(ch); 1242 if (!name) 1243 return NULL; 1244 if (forbidden_name(c, name, ch, 0)) 1245 return NULL; 1246 1247 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) { 1248 annotation = ast_for_expr(c, CHILD(n, 2)); 1249 if (!annotation) 1250 return NULL; 1251 } 1252 1253 ret = arg(name, annotation, LINENO(n), n->n_col_offset, c->c_arena); 1254 if (!ret) 1255 return NULL; 1256 return ret; 1257 } 1258 1259 /* returns -1 if failed to handle keyword only arguments 1260 returns new position to keep processing if successful 1261 (',' tfpdef ['=' test])* 1262 ^^^ 1263 start pointing here 1264 */ 1265 static int 1266 handle_keywordonly_args(struct compiling *c, const node *n, int start, 1267 asdl_seq *kwonlyargs, asdl_seq *kwdefaults) 1268 { 1269 PyObject *argname; 1270 node *ch; 1271 expr_ty expression, annotation; 1272 arg_ty arg; 1273 int i = start; 1274 int j = 0; /* index for kwdefaults and kwonlyargs */ 1275 1276 if (kwonlyargs == NULL) { 1277 ast_error(c, CHILD(n, start), "named arguments must follow bare *"); 1278 return -1; 1279 } 1280 assert(kwdefaults != NULL); 1281 while (i < NCH(n)) { 1282 ch = CHILD(n, i); 1283 switch (TYPE(ch)) { 1284 case vfpdef: 1285 case tfpdef: 1286 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { 1287 expression = ast_for_expr(c, CHILD(n, i + 2)); 1288 if (!expression) 1289 goto error; 1290 asdl_seq_SET(kwdefaults, j, expression); 1291 i += 2; /* '=' and test */ 1292 } 1293 else { /* setting NULL if no default value exists */ 1294 asdl_seq_SET(kwdefaults, j, NULL); 1295 } 1296 if (NCH(ch) == 3) { 1297 /* ch is NAME ':' test */ 1298 annotation = ast_for_expr(c, CHILD(ch, 2)); 1299 if (!annotation) 1300 goto error; 1301 } 1302 else { 1303 annotation = NULL; 1304 } 1305 ch = CHILD(ch, 0); 1306 argname = NEW_IDENTIFIER(ch); 1307 if (!argname) 1308 goto error; 1309 if (forbidden_name(c, argname, ch, 0)) 1310 goto error; 1311 arg = arg(argname, annotation, LINENO(ch), ch->n_col_offset, 1312 c->c_arena); 1313 if (!arg) 1314 goto error; 1315 asdl_seq_SET(kwonlyargs, j++, arg); 1316 i += 2; /* the name and the comma */ 1317 break; 1318 case DOUBLESTAR: 1319 return i; 1320 default: 1321 ast_error(c, ch, "unexpected node"); 1322 goto error; 1323 } 1324 } 1325 return i; 1326 error: 1327 return -1; 1328 } 1329 1330 /* Create AST for argument list. */ 1331 1332 static arguments_ty 1333 ast_for_arguments(struct compiling *c, const node *n) 1334 { 1335 /* This function handles both typedargslist (function definition) 1336 and varargslist (lambda definition). 1337 1338 parameters: '(' [typedargslist] ')' 1339 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ 1340 '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 1341 | '**' tfpdef [',']]] 1342 | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 1343 | '**' tfpdef [',']) 1344 tfpdef: NAME [':' test] 1345 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ 1346 '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 1347 | '**' vfpdef [',']]] 1348 | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 1349 | '**' vfpdef [','] 1350 ) 1351 vfpdef: NAME 1352 1353 */ 1354 int i, j, k, nposargs = 0, nkwonlyargs = 0; 1355 int nposdefaults = 0, found_default = 0; 1356 asdl_seq *posargs, *posdefaults, *kwonlyargs, *kwdefaults; 1357 arg_ty vararg = NULL, kwarg = NULL; 1358 arg_ty arg; 1359 node *ch; 1360 1361 if (TYPE(n) == parameters) { 1362 if (NCH(n) == 2) /* () as argument list */ 1363 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena); 1364 n = CHILD(n, 1); 1365 } 1366 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist); 1367 1368 /* First count the number of positional args & defaults. The 1369 variable i is the loop index for this for loop and the next. 1370 The next loop picks up where the first leaves off. 1371 */ 1372 for (i = 0; i < NCH(n); i++) { 1373 ch = CHILD(n, i); 1374 if (TYPE(ch) == STAR) { 1375 /* skip star */ 1376 i++; 1377 if (i < NCH(n) && /* skip argument following star */ 1378 (TYPE(CHILD(n, i)) == tfpdef || 1379 TYPE(CHILD(n, i)) == vfpdef)) { 1380 i++; 1381 } 1382 break; 1383 } 1384 if (TYPE(ch) == DOUBLESTAR) break; 1385 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++; 1386 if (TYPE(ch) == EQUAL) nposdefaults++; 1387 } 1388 /* count the number of keyword only args & 1389 defaults for keyword only args */ 1390 for ( ; i < NCH(n); ++i) { 1391 ch = CHILD(n, i); 1392 if (TYPE(ch) == DOUBLESTAR) break; 1393 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++; 1394 } 1395 posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL); 1396 if (!posargs && nposargs) 1397 return NULL; 1398 kwonlyargs = (nkwonlyargs ? 1399 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL); 1400 if (!kwonlyargs && nkwonlyargs) 1401 return NULL; 1402 posdefaults = (nposdefaults ? 1403 _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL); 1404 if (!posdefaults && nposdefaults) 1405 return NULL; 1406 /* The length of kwonlyargs and kwdefaults are same 1407 since we set NULL as default for keyword only argument w/o default 1408 - we have sequence data structure, but no dictionary */ 1409 kwdefaults = (nkwonlyargs ? 1410 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL); 1411 if (!kwdefaults && nkwonlyargs) 1412 return NULL; 1413 1414 if (nposargs + nkwonlyargs > 255) { 1415 ast_error(c, n, "more than 255 arguments"); 1416 return NULL; 1417 } 1418 1419 /* tfpdef: NAME [':' test] 1420 vfpdef: NAME 1421 */ 1422 i = 0; 1423 j = 0; /* index for defaults */ 1424 k = 0; /* index for args */ 1425 while (i < NCH(n)) { 1426 ch = CHILD(n, i); 1427 switch (TYPE(ch)) { 1428 case tfpdef: 1429 case vfpdef: 1430 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is 1431 anything other than EQUAL or a comma? */ 1432 /* XXX Should NCH(n) check be made a separate check? */ 1433 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { 1434 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2)); 1435 if (!expression) 1436 return NULL; 1437 assert(posdefaults != NULL); 1438 asdl_seq_SET(posdefaults, j++, expression); 1439 i += 2; 1440 found_default = 1; 1441 } 1442 else if (found_default) { 1443 ast_error(c, n, 1444 "non-default argument follows default argument"); 1445 return NULL; 1446 } 1447 arg = ast_for_arg(c, ch); 1448 if (!arg) 1449 return NULL; 1450 asdl_seq_SET(posargs, k++, arg); 1451 i += 2; /* the name and the comma */ 1452 break; 1453 case STAR: 1454 if (i+1 >= NCH(n) || 1455 (i+2 == NCH(n) && TYPE(CHILD(n, i+1)) == COMMA)) { 1456 ast_error(c, CHILD(n, i), 1457 "named arguments must follow bare *"); 1458 return NULL; 1459 } 1460 ch = CHILD(n, i+1); /* tfpdef or COMMA */ 1461 if (TYPE(ch) == COMMA) { 1462 int res = 0; 1463 i += 2; /* now follows keyword only arguments */ 1464 res = handle_keywordonly_args(c, n, i, 1465 kwonlyargs, kwdefaults); 1466 if (res == -1) return NULL; 1467 i = res; /* res has new position to process */ 1468 } 1469 else { 1470 vararg = ast_for_arg(c, ch); 1471 if (!vararg) 1472 return NULL; 1473 1474 i += 3; 1475 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef 1476 || TYPE(CHILD(n, i)) == vfpdef)) { 1477 int res = 0; 1478 res = handle_keywordonly_args(c, n, i, 1479 kwonlyargs, kwdefaults); 1480 if (res == -1) return NULL; 1481 i = res; /* res has new position to process */ 1482 } 1483 } 1484 break; 1485 case DOUBLESTAR: 1486 ch = CHILD(n, i+1); /* tfpdef */ 1487 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef); 1488 kwarg = ast_for_arg(c, ch); 1489 if (!kwarg) 1490 return NULL; 1491 i += 3; 1492 break; 1493 default: 1494 PyErr_Format(PyExc_SystemError, 1495 "unexpected node in varargslist: %d @ %d", 1496 TYPE(ch), i); 1497 return NULL; 1498 } 1499 } 1500 return arguments(posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena); 1501 } 1502 1503 static expr_ty 1504 ast_for_dotted_name(struct compiling *c, const node *n) 1505 { 1506 expr_ty e; 1507 identifier id; 1508 int lineno, col_offset; 1509 int i; 1510 1511 REQ(n, dotted_name); 1512 1513 lineno = LINENO(n); 1514 col_offset = n->n_col_offset; 1515 1516 id = NEW_IDENTIFIER(CHILD(n, 0)); 1517 if (!id) 1518 return NULL; 1519 e = Name(id, Load, lineno, col_offset, c->c_arena); 1520 if (!e) 1521 return NULL; 1522 1523 for (i = 2; i < NCH(n); i+=2) { 1524 id = NEW_IDENTIFIER(CHILD(n, i)); 1525 if (!id) 1526 return NULL; 1527 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena); 1528 if (!e) 1529 return NULL; 1530 } 1531 1532 return e; 1533 } 1534 1535 static expr_ty 1536 ast_for_decorator(struct compiling *c, const node *n) 1537 { 1538 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */ 1539 expr_ty d = NULL; 1540 expr_ty name_expr; 1541 1542 REQ(n, decorator); 1543 REQ(CHILD(n, 0), AT); 1544 REQ(RCHILD(n, -1), NEWLINE); 1545 1546 name_expr = ast_for_dotted_name(c, CHILD(n, 1)); 1547 if (!name_expr) 1548 return NULL; 1549 1550 if (NCH(n) == 3) { /* No arguments */ 1551 d = name_expr; 1552 name_expr = NULL; 1553 } 1554 else if (NCH(n) == 5) { /* Call with no arguments */ 1555 d = Call(name_expr, NULL, NULL, LINENO(n), 1556 n->n_col_offset, c->c_arena); 1557 if (!d) 1558 return NULL; 1559 name_expr = NULL; 1560 } 1561 else { 1562 d = ast_for_call(c, CHILD(n, 3), name_expr); 1563 if (!d) 1564 return NULL; 1565 name_expr = NULL; 1566 } 1567 1568 return d; 1569 } 1570 1571 static asdl_seq* 1572 ast_for_decorators(struct compiling *c, const node *n) 1573 { 1574 asdl_seq* decorator_seq; 1575 expr_ty d; 1576 int i; 1577 1578 REQ(n, decorators); 1579 decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena); 1580 if (!decorator_seq) 1581 return NULL; 1582 1583 for (i = 0; i < NCH(n); i++) { 1584 d = ast_for_decorator(c, CHILD(n, i)); 1585 if (!d) 1586 return NULL; 1587 asdl_seq_SET(decorator_seq, i, d); 1588 } 1589 return decorator_seq; 1590 } 1591 1592 static stmt_ty 1593 ast_for_funcdef_impl(struct compiling *c, const node *n, 1594 asdl_seq *decorator_seq, int is_async) 1595 { 1596 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */ 1597 identifier name; 1598 arguments_ty args; 1599 asdl_seq *body; 1600 expr_ty returns = NULL; 1601 int name_i = 1; 1602 1603 REQ(n, funcdef); 1604 1605 name = NEW_IDENTIFIER(CHILD(n, name_i)); 1606 if (!name) 1607 return NULL; 1608 if (forbidden_name(c, name, CHILD(n, name_i), 0)) 1609 return NULL; 1610 args = ast_for_arguments(c, CHILD(n, name_i + 1)); 1611 if (!args) 1612 return NULL; 1613 if (TYPE(CHILD(n, name_i+2)) == RARROW) { 1614 returns = ast_for_expr(c, CHILD(n, name_i + 3)); 1615 if (!returns) 1616 return NULL; 1617 name_i += 2; 1618 } 1619 body = ast_for_suite(c, CHILD(n, name_i + 3)); 1620 if (!body) 1621 return NULL; 1622 1623 if (is_async) 1624 return AsyncFunctionDef(name, args, body, decorator_seq, returns, 1625 LINENO(n), 1626 n->n_col_offset, c->c_arena); 1627 else 1628 return FunctionDef(name, args, body, decorator_seq, returns, 1629 LINENO(n), 1630 n->n_col_offset, c->c_arena); 1631 } 1632 1633 static stmt_ty 1634 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) 1635 { 1636 /* async_funcdef: ASYNC funcdef */ 1637 REQ(n, async_funcdef); 1638 REQ(CHILD(n, 0), ASYNC); 1639 REQ(CHILD(n, 1), funcdef); 1640 1641 return ast_for_funcdef_impl(c, CHILD(n, 1), decorator_seq, 1642 1 /* is_async */); 1643 } 1644 1645 static stmt_ty 1646 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) 1647 { 1648 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */ 1649 return ast_for_funcdef_impl(c, n, decorator_seq, 1650 0 /* is_async */); 1651 } 1652 1653 1654 static stmt_ty 1655 ast_for_async_stmt(struct compiling *c, const node *n) 1656 { 1657 /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */ 1658 REQ(n, async_stmt); 1659 REQ(CHILD(n, 0), ASYNC); 1660 1661 switch (TYPE(CHILD(n, 1))) { 1662 case funcdef: 1663 return ast_for_funcdef_impl(c, CHILD(n, 1), NULL, 1664 1 /* is_async */); 1665 case with_stmt: 1666 return ast_for_with_stmt(c, CHILD(n, 1), 1667 1 /* is_async */); 1668 1669 case for_stmt: 1670 return ast_for_for_stmt(c, CHILD(n, 1), 1671 1 /* is_async */); 1672 1673 default: 1674 PyErr_Format(PyExc_SystemError, 1675 "invalid async stament: %s", 1676 STR(CHILD(n, 1))); 1677 return NULL; 1678 } 1679 } 1680 1681 static stmt_ty 1682 ast_for_decorated(struct compiling *c, const node *n) 1683 { 1684 /* decorated: decorators (classdef | funcdef | async_funcdef) */ 1685 stmt_ty thing = NULL; 1686 asdl_seq *decorator_seq = NULL; 1687 1688 REQ(n, decorated); 1689 1690 decorator_seq = ast_for_decorators(c, CHILD(n, 0)); 1691 if (!decorator_seq) 1692 return NULL; 1693 1694 assert(TYPE(CHILD(n, 1)) == funcdef || 1695 TYPE(CHILD(n, 1)) == async_funcdef || 1696 TYPE(CHILD(n, 1)) == classdef); 1697 1698 if (TYPE(CHILD(n, 1)) == funcdef) { 1699 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq); 1700 } else if (TYPE(CHILD(n, 1)) == classdef) { 1701 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq); 1702 } else if (TYPE(CHILD(n, 1)) == async_funcdef) { 1703 thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq); 1704 } 1705 /* we count the decorators in when talking about the class' or 1706 * function's line number */ 1707 if (thing) { 1708 thing->lineno = LINENO(n); 1709 thing->col_offset = n->n_col_offset; 1710 } 1711 return thing; 1712 } 1713 1714 static expr_ty 1715 ast_for_lambdef(struct compiling *c, const node *n) 1716 { 1717 /* lambdef: 'lambda' [varargslist] ':' test 1718 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */ 1719 arguments_ty args; 1720 expr_ty expression; 1721 1722 if (NCH(n) == 3) { 1723 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena); 1724 if (!args) 1725 return NULL; 1726 expression = ast_for_expr(c, CHILD(n, 2)); 1727 if (!expression) 1728 return NULL; 1729 } 1730 else { 1731 args = ast_for_arguments(c, CHILD(n, 1)); 1732 if (!args) 1733 return NULL; 1734 expression = ast_for_expr(c, CHILD(n, 3)); 1735 if (!expression) 1736 return NULL; 1737 } 1738 1739 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena); 1740 } 1741 1742 static expr_ty 1743 ast_for_ifexpr(struct compiling *c, const node *n) 1744 { 1745 /* test: or_test 'if' or_test 'else' test */ 1746 expr_ty expression, body, orelse; 1747 1748 assert(NCH(n) == 5); 1749 body = ast_for_expr(c, CHILD(n, 0)); 1750 if (!body) 1751 return NULL; 1752 expression = ast_for_expr(c, CHILD(n, 2)); 1753 if (!expression) 1754 return NULL; 1755 orelse = ast_for_expr(c, CHILD(n, 4)); 1756 if (!orelse) 1757 return NULL; 1758 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset, 1759 c->c_arena); 1760 } 1761 1762 /* 1763 Count the number of 'for' loops in a comprehension. 1764 1765 Helper for ast_for_comprehension(). 1766 */ 1767 1768 static int 1769 count_comp_fors(struct compiling *c, const node *n) 1770 { 1771 int n_fors = 0; 1772 int is_async; 1773 1774 count_comp_for: 1775 is_async = 0; 1776 n_fors++; 1777 REQ(n, comp_for); 1778 if (TYPE(CHILD(n, 0)) == ASYNC) { 1779 is_async = 1; 1780 } 1781 if (NCH(n) == (5 + is_async)) { 1782 n = CHILD(n, 4 + is_async); 1783 } 1784 else { 1785 return n_fors; 1786 } 1787 count_comp_iter: 1788 REQ(n, comp_iter); 1789 n = CHILD(n, 0); 1790 if (TYPE(n) == comp_for) 1791 goto count_comp_for; 1792 else if (TYPE(n) == comp_if) { 1793 if (NCH(n) == 3) { 1794 n = CHILD(n, 2); 1795 goto count_comp_iter; 1796 } 1797 else 1798 return n_fors; 1799 } 1800 1801 /* Should never be reached */ 1802 PyErr_SetString(PyExc_SystemError, 1803 "logic error in count_comp_fors"); 1804 return -1; 1805 } 1806 1807 /* Count the number of 'if' statements in a comprehension. 1808 1809 Helper for ast_for_comprehension(). 1810 */ 1811 1812 static int 1813 count_comp_ifs(struct compiling *c, const node *n) 1814 { 1815 int n_ifs = 0; 1816 1817 while (1) { 1818 REQ(n, comp_iter); 1819 if (TYPE(CHILD(n, 0)) == comp_for) 1820 return n_ifs; 1821 n = CHILD(n, 0); 1822 REQ(n, comp_if); 1823 n_ifs++; 1824 if (NCH(n) == 2) 1825 return n_ifs; 1826 n = CHILD(n, 2); 1827 } 1828 } 1829 1830 static asdl_seq * 1831 ast_for_comprehension(struct compiling *c, const node *n) 1832 { 1833 int i, n_fors; 1834 asdl_seq *comps; 1835 1836 n_fors = count_comp_fors(c, n); 1837 if (n_fors == -1) 1838 return NULL; 1839 1840 comps = _Py_asdl_seq_new(n_fors, c->c_arena); 1841 if (!comps) 1842 return NULL; 1843 1844 for (i = 0; i < n_fors; i++) { 1845 comprehension_ty comp; 1846 asdl_seq *t; 1847 expr_ty expression, first; 1848 node *for_ch; 1849 int is_async = 0; 1850 1851 REQ(n, comp_for); 1852 1853 if (TYPE(CHILD(n, 0)) == ASYNC) { 1854 is_async = 1; 1855 } 1856 1857 for_ch = CHILD(n, 1 + is_async); 1858 t = ast_for_exprlist(c, for_ch, Store); 1859 if (!t) 1860 return NULL; 1861 expression = ast_for_expr(c, CHILD(n, 3 + is_async)); 1862 if (!expression) 1863 return NULL; 1864 1865 /* Check the # of children rather than the length of t, since 1866 (x for x, in ...) has 1 element in t, but still requires a Tuple. */ 1867 first = (expr_ty)asdl_seq_GET(t, 0); 1868 if (NCH(for_ch) == 1) 1869 comp = comprehension(first, expression, NULL, 1870 is_async, c->c_arena); 1871 else 1872 comp = comprehension(Tuple(t, Store, first->lineno, 1873 first->col_offset, c->c_arena), 1874 expression, NULL, is_async, c->c_arena); 1875 if (!comp) 1876 return NULL; 1877 1878 if (NCH(n) == (5 + is_async)) { 1879 int j, n_ifs; 1880 asdl_seq *ifs; 1881 1882 n = CHILD(n, 4 + is_async); 1883 n_ifs = count_comp_ifs(c, n); 1884 if (n_ifs == -1) 1885 return NULL; 1886 1887 ifs = _Py_asdl_seq_new(n_ifs, c->c_arena); 1888 if (!ifs) 1889 return NULL; 1890 1891 for (j = 0; j < n_ifs; j++) { 1892 REQ(n, comp_iter); 1893 n = CHILD(n, 0); 1894 REQ(n, comp_if); 1895 1896 expression = ast_for_expr(c, CHILD(n, 1)); 1897 if (!expression) 1898 return NULL; 1899 asdl_seq_SET(ifs, j, expression); 1900 if (NCH(n) == 3) 1901 n = CHILD(n, 2); 1902 } 1903 /* on exit, must guarantee that n is a comp_for */ 1904 if (TYPE(n) == comp_iter) 1905 n = CHILD(n, 0); 1906 comp->ifs = ifs; 1907 } 1908 asdl_seq_SET(comps, i, comp); 1909 } 1910 return comps; 1911 } 1912 1913 static expr_ty 1914 ast_for_itercomp(struct compiling *c, const node *n, int type) 1915 { 1916 /* testlist_comp: (test|star_expr) 1917 * ( comp_for | (',' (test|star_expr))* [','] ) */ 1918 expr_ty elt; 1919 asdl_seq *comps; 1920 node *ch; 1921 1922 assert(NCH(n) > 1); 1923 1924 ch = CHILD(n, 0); 1925 elt = ast_for_expr(c, ch); 1926 if (!elt) 1927 return NULL; 1928 if (elt->kind == Starred_kind) { 1929 ast_error(c, ch, "iterable unpacking cannot be used in comprehension"); 1930 return NULL; 1931 } 1932 1933 comps = ast_for_comprehension(c, CHILD(n, 1)); 1934 if (!comps) 1935 return NULL; 1936 1937 if (type == COMP_GENEXP) 1938 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena); 1939 else if (type == COMP_LISTCOMP) 1940 return ListComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena); 1941 else if (type == COMP_SETCOMP) 1942 return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena); 1943 else 1944 /* Should never happen */ 1945 return NULL; 1946 } 1947 1948 /* Fills in the key, value pair corresponding to the dict element. In case 1949 * of an unpacking, key is NULL. *i is advanced by the number of ast 1950 * elements. Iff successful, nonzero is returned. 1951 */ 1952 static int 1953 ast_for_dictelement(struct compiling *c, const node *n, int *i, 1954 expr_ty *key, expr_ty *value) 1955 { 1956 expr_ty expression; 1957 if (TYPE(CHILD(n, *i)) == DOUBLESTAR) { 1958 assert(NCH(n) - *i >= 2); 1959 1960 expression = ast_for_expr(c, CHILD(n, *i + 1)); 1961 if (!expression) 1962 return 0; 1963 *key = NULL; 1964 *value = expression; 1965 1966 *i += 2; 1967 } 1968 else { 1969 assert(NCH(n) - *i >= 3); 1970 1971 expression = ast_for_expr(c, CHILD(n, *i)); 1972 if (!expression) 1973 return 0; 1974 *key = expression; 1975 1976 REQ(CHILD(n, *i + 1), COLON); 1977 1978 expression = ast_for_expr(c, CHILD(n, *i + 2)); 1979 if (!expression) 1980 return 0; 1981 *value = expression; 1982 1983 *i += 3; 1984 } 1985 return 1; 1986 } 1987 1988 static expr_ty 1989 ast_for_dictcomp(struct compiling *c, const node *n) 1990 { 1991 expr_ty key, value; 1992 asdl_seq *comps; 1993 int i = 0; 1994 1995 if (!ast_for_dictelement(c, n, &i, &key, &value)) 1996 return NULL; 1997 assert(key); 1998 assert(NCH(n) - i >= 1); 1999 2000 comps = ast_for_comprehension(c, CHILD(n, i)); 2001 if (!comps) 2002 return NULL; 2003 2004 return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena); 2005 } 2006 2007 static expr_ty 2008 ast_for_dictdisplay(struct compiling *c, const node *n) 2009 { 2010 int i; 2011 int j; 2012 int size; 2013 asdl_seq *keys, *values; 2014 2015 size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */ 2016 keys = _Py_asdl_seq_new(size, c->c_arena); 2017 if (!keys) 2018 return NULL; 2019 2020 values = _Py_asdl_seq_new(size, c->c_arena); 2021 if (!values) 2022 return NULL; 2023 2024 j = 0; 2025 for (i = 0; i < NCH(n); i++) { 2026 expr_ty key, value; 2027 2028 if (!ast_for_dictelement(c, n, &i, &key, &value)) 2029 return NULL; 2030 asdl_seq_SET(keys, j, key); 2031 asdl_seq_SET(values, j, value); 2032 2033 j++; 2034 } 2035 keys->size = j; 2036 values->size = j; 2037 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena); 2038 } 2039 2040 static expr_ty 2041 ast_for_genexp(struct compiling *c, const node *n) 2042 { 2043 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument)); 2044 return ast_for_itercomp(c, n, COMP_GENEXP); 2045 } 2046 2047 static expr_ty 2048 ast_for_listcomp(struct compiling *c, const node *n) 2049 { 2050 assert(TYPE(n) == (testlist_comp)); 2051 return ast_for_itercomp(c, n, COMP_LISTCOMP); 2052 } 2053 2054 static expr_ty 2055 ast_for_setcomp(struct compiling *c, const node *n) 2056 { 2057 assert(TYPE(n) == (dictorsetmaker)); 2058 return ast_for_itercomp(c, n, COMP_SETCOMP); 2059 } 2060 2061 static expr_ty 2062 ast_for_setdisplay(struct compiling *c, const node *n) 2063 { 2064 int i; 2065 int size; 2066 asdl_seq *elts; 2067 2068 assert(TYPE(n) == (dictorsetmaker)); 2069 size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */ 2070 elts = _Py_asdl_seq_new(size, c->c_arena); 2071 if (!elts) 2072 return NULL; 2073 for (i = 0; i < NCH(n); i += 2) { 2074 expr_ty expression; 2075 expression = ast_for_expr(c, CHILD(n, i)); 2076 if (!expression) 2077 return NULL; 2078 asdl_seq_SET(elts, i / 2, expression); 2079 } 2080 return Set(elts, LINENO(n), n->n_col_offset, c->c_arena); 2081 } 2082 2083 static expr_ty 2084 ast_for_atom(struct compiling *c, const node *n) 2085 { 2086 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' 2087 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+ 2088 | '...' | 'None' | 'True' | 'False' 2089 */ 2090 node *ch = CHILD(n, 0); 2091 2092 switch (TYPE(ch)) { 2093 case NAME: { 2094 PyObject *name; 2095 const char *s = STR(ch); 2096 size_t len = strlen(s); 2097 if (len >= 4 && len <= 5) { 2098 if (!strcmp(s, "None")) 2099 return NameConstant(Py_None, LINENO(n), n->n_col_offset, c->c_arena); 2100 if (!strcmp(s, "True")) 2101 return NameConstant(Py_True, LINENO(n), n->n_col_offset, c->c_arena); 2102 if (!strcmp(s, "False")) 2103 return NameConstant(Py_False, LINENO(n), n->n_col_offset, c->c_arena); 2104 } 2105 name = new_identifier(s, c); 2106 if (!name) 2107 return NULL; 2108 /* All names start in Load context, but may later be changed. */ 2109 return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena); 2110 } 2111 case STRING: { 2112 expr_ty str = parsestrplus(c, n); 2113 if (!str) { 2114 const char *errtype = NULL; 2115 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) 2116 errtype = "unicode error"; 2117 else if (PyErr_ExceptionMatches(PyExc_ValueError)) 2118 errtype = "value error"; 2119 if (errtype) { 2120 char buf[128]; 2121 const char *s = NULL; 2122 PyObject *type, *value, *tback, *errstr; 2123 PyErr_Fetch(&type, &value, &tback); 2124 errstr = PyObject_Str(value); 2125 if (errstr) 2126 s = PyUnicode_AsUTF8(errstr); 2127 if (s) { 2128 PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s); 2129 } else { 2130 PyErr_Clear(); 2131 PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype); 2132 } 2133 Py_XDECREF(errstr); 2134 ast_error(c, n, buf); 2135 Py_DECREF(type); 2136 Py_XDECREF(value); 2137 Py_XDECREF(tback); 2138 } 2139 return NULL; 2140 } 2141 return str; 2142 } 2143 case NUMBER: { 2144 PyObject *pynum = parsenumber(c, STR(ch)); 2145 if (!pynum) 2146 return NULL; 2147 2148 if (PyArena_AddPyObject(c->c_arena, pynum) < 0) { 2149 Py_DECREF(pynum); 2150 return NULL; 2151 } 2152 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena); 2153 } 2154 case ELLIPSIS: /* Ellipsis */ 2155 return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena); 2156 case LPAR: /* some parenthesized expressions */ 2157 ch = CHILD(n, 1); 2158 2159 if (TYPE(ch) == RPAR) 2160 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena); 2161 2162 if (TYPE(ch) == yield_expr) 2163 return ast_for_expr(c, ch); 2164 2165 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */ 2166 if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == comp_for)) 2167 return ast_for_genexp(c, ch); 2168 2169 return ast_for_testlist(c, ch); 2170 case LSQB: /* list (or list comprehension) */ 2171 ch = CHILD(n, 1); 2172 2173 if (TYPE(ch) == RSQB) 2174 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena); 2175 2176 REQ(ch, testlist_comp); 2177 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { 2178 asdl_seq *elts = seq_for_testlist(c, ch); 2179 if (!elts) 2180 return NULL; 2181 2182 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena); 2183 } 2184 else 2185 return ast_for_listcomp(c, ch); 2186 case LBRACE: { 2187 /* dictorsetmaker: ( ((test ':' test | '**' test) 2188 * (comp_for | (',' (test ':' test | '**' test))* [','])) | 2189 * ((test | '*' test) 2190 * (comp_for | (',' (test | '*' test))* [','])) ) */ 2191 expr_ty res; 2192 ch = CHILD(n, 1); 2193 if (TYPE(ch) == RBRACE) { 2194 /* It's an empty dict. */ 2195 return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena); 2196 } 2197 else { 2198 int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR); 2199 if (NCH(ch) == 1 || 2200 (NCH(ch) > 1 && 2201 TYPE(CHILD(ch, 1)) == COMMA)) { 2202 /* It's a set display. */ 2203 res = ast_for_setdisplay(c, ch); 2204 } 2205 else if (NCH(ch) > 1 && 2206 TYPE(CHILD(ch, 1)) == comp_for) { 2207 /* It's a set comprehension. */ 2208 res = ast_for_setcomp(c, ch); 2209 } 2210 else if (NCH(ch) > 3 - is_dict && 2211 TYPE(CHILD(ch, 3 - is_dict)) == comp_for) { 2212 /* It's a dictionary comprehension. */ 2213 if (is_dict) { 2214 ast_error(c, n, "dict unpacking cannot be used in " 2215 "dict comprehension"); 2216 return NULL; 2217 } 2218 res = ast_for_dictcomp(c, ch); 2219 } 2220 else { 2221 /* It's a dictionary display. */ 2222 res = ast_for_dictdisplay(c, ch); 2223 } 2224 if (res) { 2225 res->lineno = LINENO(n); 2226 res->col_offset = n->n_col_offset; 2227 } 2228 return res; 2229 } 2230 } 2231 default: 2232 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch)); 2233 return NULL; 2234 } 2235 } 2236 2237 static slice_ty 2238 ast_for_slice(struct compiling *c, const node *n) 2239 { 2240 node *ch; 2241 expr_ty lower = NULL, upper = NULL, step = NULL; 2242 2243 REQ(n, subscript); 2244 2245 /* 2246 subscript: test | [test] ':' [test] [sliceop] 2247 sliceop: ':' [test] 2248 */ 2249 ch = CHILD(n, 0); 2250 if (NCH(n) == 1 && TYPE(ch) == test) { 2251 /* 'step' variable hold no significance in terms of being used over 2252 other vars */ 2253 step = ast_for_expr(c, ch); 2254 if (!step) 2255 return NULL; 2256 2257 return Index(step, c->c_arena); 2258 } 2259 2260 if (TYPE(ch) == test) { 2261 lower = ast_for_expr(c, ch); 2262 if (!lower) 2263 return NULL; 2264 } 2265 2266 /* If there's an upper bound it's in the second or third position. */ 2267 if (TYPE(ch) == COLON) { 2268 if (NCH(n) > 1) { 2269 node *n2 = CHILD(n, 1); 2270 2271 if (TYPE(n2) == test) { 2272 upper = ast_for_expr(c, n2); 2273 if (!upper) 2274 return NULL; 2275 } 2276 } 2277 } else if (NCH(n) > 2) { 2278 node *n2 = CHILD(n, 2); 2279 2280 if (TYPE(n2) == test) { 2281 upper = ast_for_expr(c, n2); 2282 if (!upper) 2283 return NULL; 2284 } 2285 } 2286 2287 ch = CHILD(n, NCH(n) - 1); 2288 if (TYPE(ch) == sliceop) { 2289 if (NCH(ch) != 1) { 2290 ch = CHILD(ch, 1); 2291 if (TYPE(ch) == test) { 2292 step = ast_for_expr(c, ch); 2293 if (!step) 2294 return NULL; 2295 } 2296 } 2297 } 2298 2299 return Slice(lower, upper, step, c->c_arena); 2300 } 2301 2302 static expr_ty 2303 ast_for_binop(struct compiling *c, const node *n) 2304 { 2305 /* Must account for a sequence of expressions. 2306 How should A op B op C by represented? 2307 BinOp(BinOp(A, op, B), op, C). 2308 */ 2309 2310 int i, nops; 2311 expr_ty expr1, expr2, result; 2312 operator_ty newoperator; 2313 2314 expr1 = ast_for_expr(c, CHILD(n, 0)); 2315 if (!expr1) 2316 return NULL; 2317 2318 expr2 = ast_for_expr(c, CHILD(n, 2)); 2319 if (!expr2) 2320 return NULL; 2321 2322 newoperator = get_operator(CHILD(n, 1)); 2323 if (!newoperator) 2324 return NULL; 2325 2326 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, 2327 c->c_arena); 2328 if (!result) 2329 return NULL; 2330 2331 nops = (NCH(n) - 1) / 2; 2332 for (i = 1; i < nops; i++) { 2333 expr_ty tmp_result, tmp; 2334 const node* next_oper = CHILD(n, i * 2 + 1); 2335 2336 newoperator = get_operator(next_oper); 2337 if (!newoperator) 2338 return NULL; 2339 2340 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2)); 2341 if (!tmp) 2342 return NULL; 2343 2344 tmp_result = BinOp(result, newoperator, tmp, 2345 LINENO(next_oper), next_oper->n_col_offset, 2346 c->c_arena); 2347 if (!tmp_result) 2348 return NULL; 2349 result = tmp_result; 2350 } 2351 return result; 2352 } 2353 2354 static expr_ty 2355 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr) 2356 { 2357 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 2358 subscriptlist: subscript (',' subscript)* [','] 2359 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 2360 */ 2361 REQ(n, trailer); 2362 if (TYPE(CHILD(n, 0)) == LPAR) { 2363 if (NCH(n) == 2) 2364 return Call(left_expr, NULL, NULL, LINENO(n), 2365 n->n_col_offset, c->c_arena); 2366 else 2367 return ast_for_call(c, CHILD(n, 1), left_expr); 2368 } 2369 else if (TYPE(CHILD(n, 0)) == DOT) { 2370 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1)); 2371 if (!attr_id) 2372 return NULL; 2373 return Attribute(left_expr, attr_id, Load, 2374 LINENO(n), n->n_col_offset, c->c_arena); 2375 } 2376 else { 2377 REQ(CHILD(n, 0), LSQB); 2378 REQ(CHILD(n, 2), RSQB); 2379 n = CHILD(n, 1); 2380 if (NCH(n) == 1) { 2381 slice_ty slc = ast_for_slice(c, CHILD(n, 0)); 2382 if (!slc) 2383 return NULL; 2384 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset, 2385 c->c_arena); 2386 } 2387 else { 2388 /* The grammar is ambiguous here. The ambiguity is resolved 2389 by treating the sequence as a tuple literal if there are 2390 no slice features. 2391 */ 2392 int j; 2393 slice_ty slc; 2394 expr_ty e; 2395 int simple = 1; 2396 asdl_seq *slices, *elts; 2397 slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 2398 if (!slices) 2399 return NULL; 2400 for (j = 0; j < NCH(n); j += 2) { 2401 slc = ast_for_slice(c, CHILD(n, j)); 2402 if (!slc) 2403 return NULL; 2404 if (slc->kind != Index_kind) 2405 simple = 0; 2406 asdl_seq_SET(slices, j / 2, slc); 2407 } 2408 if (!simple) { 2409 return Subscript(left_expr, ExtSlice(slices, c->c_arena), 2410 Load, LINENO(n), n->n_col_offset, c->c_arena); 2411 } 2412 /* extract Index values and put them in a Tuple */ 2413 elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena); 2414 if (!elts) 2415 return NULL; 2416 for (j = 0; j < asdl_seq_LEN(slices); ++j) { 2417 slc = (slice_ty)asdl_seq_GET(slices, j); 2418 assert(slc->kind == Index_kind && slc->v.Index.value); 2419 asdl_seq_SET(elts, j, slc->v.Index.value); 2420 } 2421 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena); 2422 if (!e) 2423 return NULL; 2424 return Subscript(left_expr, Index(e, c->c_arena), 2425 Load, LINENO(n), n->n_col_offset, c->c_arena); 2426 } 2427 } 2428 } 2429 2430 static expr_ty 2431 ast_for_factor(struct compiling *c, const node *n) 2432 { 2433 expr_ty expression; 2434 2435 expression = ast_for_expr(c, CHILD(n, 1)); 2436 if (!expression) 2437 return NULL; 2438 2439 switch (TYPE(CHILD(n, 0))) { 2440 case PLUS: 2441 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset, 2442 c->c_arena); 2443 case MINUS: 2444 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset, 2445 c->c_arena); 2446 case TILDE: 2447 return UnaryOp(Invert, expression, LINENO(n), 2448 n->n_col_offset, c->c_arena); 2449 } 2450 PyErr_Format(PyExc_SystemError, "unhandled factor: %d", 2451 TYPE(CHILD(n, 0))); 2452 return NULL; 2453 } 2454 2455 static expr_ty 2456 ast_for_atom_expr(struct compiling *c, const node *n) 2457 { 2458 int i, nch, start = 0; 2459 expr_ty e, tmp; 2460 2461 REQ(n, atom_expr); 2462 nch = NCH(n); 2463 2464 if (TYPE(CHILD(n, 0)) == AWAIT) { 2465 start = 1; 2466 assert(nch > 1); 2467 } 2468 2469 e = ast_for_atom(c, CHILD(n, start)); 2470 if (!e) 2471 return NULL; 2472 if (nch == 1) 2473 return e; 2474 if (start && nch == 2) { 2475 return Await(e, LINENO(n), n->n_col_offset, c->c_arena); 2476 } 2477 2478 for (i = start + 1; i < nch; i++) { 2479 node *ch = CHILD(n, i); 2480 if (TYPE(ch) != trailer) 2481 break; 2482 tmp = ast_for_trailer(c, ch, e); 2483 if (!tmp) 2484 return NULL; 2485 tmp->lineno = e->lineno; 2486 tmp->col_offset = e->col_offset; 2487 e = tmp; 2488 } 2489 2490 if (start) { 2491 /* there was an AWAIT */ 2492 return Await(e, LINENO(n), n->n_col_offset, c->c_arena); 2493 } 2494 else { 2495 return e; 2496 } 2497 } 2498 2499 static expr_ty 2500 ast_for_power(struct compiling *c, const node *n) 2501 { 2502 /* power: atom trailer* ('**' factor)* 2503 */ 2504 expr_ty e; 2505 REQ(n, power); 2506 e = ast_for_atom_expr(c, CHILD(n, 0)); 2507 if (!e) 2508 return NULL; 2509 if (NCH(n) == 1) 2510 return e; 2511 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { 2512 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); 2513 if (!f) 2514 return NULL; 2515 e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena); 2516 } 2517 return e; 2518 } 2519 2520 static expr_ty 2521 ast_for_starred(struct compiling *c, const node *n) 2522 { 2523 expr_ty tmp; 2524 REQ(n, star_expr); 2525 2526 tmp = ast_for_expr(c, CHILD(n, 1)); 2527 if (!tmp) 2528 return NULL; 2529 2530 /* The Load context is changed later. */ 2531 return Starred(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena); 2532 } 2533 2534 2535 /* Do not name a variable 'expr'! Will cause a compile error. 2536 */ 2537 2538 static expr_ty 2539 ast_for_expr(struct compiling *c, const node *n) 2540 { 2541 /* handle the full range of simple expressions 2542 test: or_test ['if' or_test 'else' test] | lambdef 2543 test_nocond: or_test | lambdef_nocond 2544 or_test: and_test ('or' and_test)* 2545 and_test: not_test ('and' not_test)* 2546 not_test: 'not' not_test | comparison 2547 comparison: expr (comp_op expr)* 2548 expr: xor_expr ('|' xor_expr)* 2549 xor_expr: and_expr ('^' and_expr)* 2550 and_expr: shift_expr ('&' shift_expr)* 2551 shift_expr: arith_expr (('<<'|'>>') arith_expr)* 2552 arith_expr: term (('+'|'-') term)* 2553 term: factor (('*'|'@'|'/'|'%'|'//') factor)* 2554 factor: ('+'|'-'|'~') factor | power 2555 power: atom_expr ['**' factor] 2556 atom_expr: [AWAIT] atom trailer* 2557 yield_expr: 'yield' [yield_arg] 2558 */ 2559 2560 asdl_seq *seq; 2561 int i; 2562 2563 loop: 2564 switch (TYPE(n)) { 2565 case test: 2566 case test_nocond: 2567 if (TYPE(CHILD(n, 0)) == lambdef || 2568 TYPE(CHILD(n, 0)) == lambdef_nocond) 2569 return ast_for_lambdef(c, CHILD(n, 0)); 2570 else if (NCH(n) > 1) 2571 return ast_for_ifexpr(c, n); 2572 /* Fallthrough */ 2573 case or_test: 2574 case and_test: 2575 if (NCH(n) == 1) { 2576 n = CHILD(n, 0); 2577 goto loop; 2578 } 2579 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 2580 if (!seq) 2581 return NULL; 2582 for (i = 0; i < NCH(n); i += 2) { 2583 expr_ty e = ast_for_expr(c, CHILD(n, i)); 2584 if (!e) 2585 return NULL; 2586 asdl_seq_SET(seq, i / 2, e); 2587 } 2588 if (!strcmp(STR(CHILD(n, 1)), "and")) 2589 return BoolOp(And, seq, LINENO(n), n->n_col_offset, 2590 c->c_arena); 2591 assert(!strcmp(STR(CHILD(n, 1)), "or")); 2592 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena); 2593 case not_test: 2594 if (NCH(n) == 1) { 2595 n = CHILD(n, 0); 2596 goto loop; 2597 } 2598 else { 2599 expr_ty expression = ast_for_expr(c, CHILD(n, 1)); 2600 if (!expression) 2601 return NULL; 2602 2603 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset, 2604 c->c_arena); 2605 } 2606 case comparison: 2607 if (NCH(n) == 1) { 2608 n = CHILD(n, 0); 2609 goto loop; 2610 } 2611 else { 2612 expr_ty expression; 2613 asdl_int_seq *ops; 2614 asdl_seq *cmps; 2615 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena); 2616 if (!ops) 2617 return NULL; 2618 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); 2619 if (!cmps) { 2620 return NULL; 2621 } 2622 for (i = 1; i < NCH(n); i += 2) { 2623 cmpop_ty newoperator; 2624 2625 newoperator = ast_for_comp_op(c, CHILD(n, i)); 2626 if (!newoperator) { 2627 return NULL; 2628 } 2629 2630 expression = ast_for_expr(c, CHILD(n, i + 1)); 2631 if (!expression) { 2632 return NULL; 2633 } 2634 2635 asdl_seq_SET(ops, i / 2, newoperator); 2636 asdl_seq_SET(cmps, i / 2, expression); 2637 } 2638 expression = ast_for_expr(c, CHILD(n, 0)); 2639 if (!expression) { 2640 return NULL; 2641 } 2642 2643 return Compare(expression, ops, cmps, LINENO(n), 2644 n->n_col_offset, c->c_arena); 2645 } 2646 break; 2647 2648 case star_expr: 2649 return ast_for_starred(c, n); 2650 /* The next five cases all handle BinOps. The main body of code 2651 is the same in each case, but the switch turned inside out to 2652 reuse the code for each type of operator. 2653 */ 2654 case expr: 2655 case xor_expr: 2656 case and_expr: 2657 case shift_expr: 2658 case arith_expr: 2659 case term: 2660 if (NCH(n) == 1) { 2661 n = CHILD(n, 0); 2662 goto loop; 2663 } 2664 return ast_for_binop(c, n); 2665 case yield_expr: { 2666 node *an = NULL; 2667 node *en = NULL; 2668 int is_from = 0; 2669 expr_ty exp = NULL; 2670 if (NCH(n) > 1) 2671 an = CHILD(n, 1); /* yield_arg */ 2672 if (an) { 2673 en = CHILD(an, NCH(an) - 1); 2674 if (NCH(an) == 2) { 2675 is_from = 1; 2676 exp = ast_for_expr(c, en); 2677 } 2678 else 2679 exp = ast_for_testlist(c, en); 2680 if (!exp) 2681 return NULL; 2682 } 2683 if (is_from) 2684 return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena); 2685 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena); 2686 } 2687 case factor: 2688 if (NCH(n) == 1) { 2689 n = CHILD(n, 0); 2690 goto loop; 2691 } 2692 return ast_for_factor(c, n); 2693 case power: 2694 return ast_for_power(c, n); 2695 default: 2696 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n)); 2697 return NULL; 2698 } 2699 /* should never get here unless if error is set */ 2700 return NULL; 2701 } 2702 2703 static expr_ty 2704 ast_for_call(struct compiling *c, const node *n, expr_ty func) 2705 { 2706 /* 2707 arglist: argument (',' argument)* [','] 2708 argument: ( test [comp_for] | '*' test | test '=' test | '**' test ) 2709 */ 2710 2711 int i, nargs, nkeywords, ngens; 2712 int ndoublestars; 2713 asdl_seq *args; 2714 asdl_seq *keywords; 2715 2716 REQ(n, arglist); 2717 2718 nargs = 0; 2719 nkeywords = 0; 2720 ngens = 0; 2721 for (i = 0; i < NCH(n); i++) { 2722 node *ch = CHILD(n, i); 2723 if (TYPE(ch) == argument) { 2724 if (NCH(ch) == 1) 2725 nargs++; 2726 else if (TYPE(CHILD(ch, 1)) == comp_for) 2727 ngens++; 2728 else if (TYPE(CHILD(ch, 0)) == STAR) 2729 nargs++; 2730 else 2731 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */ 2732 nkeywords++; 2733 } 2734 } 2735 if (ngens > 1 || (ngens && (nargs || nkeywords))) { 2736 ast_error(c, n, "Generator expression must be parenthesized " 2737 "if not sole argument"); 2738 return NULL; 2739 } 2740 2741 if (nargs + nkeywords + ngens > 255) { 2742 ast_error(c, n, "more than 255 arguments"); 2743 return NULL; 2744 } 2745 2746 args = _Py_asdl_seq_new(nargs + ngens, c->c_arena); 2747 if (!args) 2748 return NULL; 2749 keywords = _Py_asdl_seq_new(nkeywords, c->c_arena); 2750 if (!keywords) 2751 return NULL; 2752 2753 nargs = 0; /* positional arguments + iterable argument unpackings */ 2754 nkeywords = 0; /* keyword arguments + keyword argument unpackings */ 2755 ndoublestars = 0; /* just keyword argument unpackings */ 2756 for (i = 0; i < NCH(n); i++) { 2757 node *ch = CHILD(n, i); 2758 if (TYPE(ch) == argument) { 2759 expr_ty e; 2760 node *chch = CHILD(ch, 0); 2761 if (NCH(ch) == 1) { 2762 /* a positional argument */ 2763 if (nkeywords) { 2764 if (ndoublestars) { 2765 ast_error(c, chch, 2766 "positional argument follows " 2767 "keyword argument unpacking"); 2768 } 2769 else { 2770 ast_error(c, chch, 2771 "positional argument follows " 2772 "keyword argument"); 2773 } 2774 return NULL; 2775 } 2776 e = ast_for_expr(c, chch); 2777 if (!e) 2778 return NULL; 2779 asdl_seq_SET(args, nargs++, e); 2780 } 2781 else if (TYPE(chch) == STAR) { 2782 /* an iterable argument unpacking */ 2783 expr_ty starred; 2784 if (ndoublestars) { 2785 ast_error(c, chch, 2786 "iterable argument unpacking follows " 2787 "keyword argument unpacking"); 2788 return NULL; 2789 } 2790 e = ast_for_expr(c, CHILD(ch, 1)); 2791 if (!e) 2792 return NULL; 2793 starred = Starred(e, Load, LINENO(chch), 2794 chch->n_col_offset, 2795 c->c_arena); 2796 if (!starred) 2797 return NULL; 2798 asdl_seq_SET(args, nargs++, starred); 2799 2800 } 2801 else if (TYPE(chch) == DOUBLESTAR) { 2802 /* a keyword argument unpacking */ 2803 keyword_ty kw; 2804 i++; 2805 e = ast_for_expr(c, CHILD(ch, 1)); 2806 if (!e) 2807 return NULL; 2808 kw = keyword(NULL, e, c->c_arena); 2809 asdl_seq_SET(keywords, nkeywords++, kw); 2810 ndoublestars++; 2811 } 2812 else if (TYPE(CHILD(ch, 1)) == comp_for) { 2813 /* the lone generator expression */ 2814 e = ast_for_genexp(c, ch); 2815 if (!e) 2816 return NULL; 2817 asdl_seq_SET(args, nargs++, e); 2818 } 2819 else { 2820 /* a keyword argument */ 2821 keyword_ty kw; 2822 identifier key, tmp; 2823 int k; 2824 2825 /* chch is test, but must be an identifier? */ 2826 e = ast_for_expr(c, chch); 2827 if (!e) 2828 return NULL; 2829 /* f(lambda x: x[0] = 3) ends up getting parsed with 2830 * LHS test = lambda x: x[0], and RHS test = 3. 2831 * SF bug 132313 points out that complaining about a keyword 2832 * then is very confusing. 2833 */ 2834 if (e->kind == Lambda_kind) { 2835 ast_error(c, chch, 2836 "lambda cannot contain assignment"); 2837 return NULL; 2838 } 2839 else if (e->kind != Name_kind) { 2840 ast_error(c, chch, 2841 "keyword can't be an expression"); 2842 return NULL; 2843 } 2844 else if (forbidden_name(c, e->v.Name.id, ch, 1)) { 2845 return NULL; 2846 } 2847 key = e->v.Name.id; 2848 for (k = 0; k < nkeywords; k++) { 2849 tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg; 2850 if (tmp && !PyUnicode_Compare(tmp, key)) { 2851 ast_error(c, chch, 2852 "keyword argument repeated"); 2853 return NULL; 2854 } 2855 } 2856 e = ast_for_expr(c, CHILD(ch, 2)); 2857 if (!e) 2858 return NULL; 2859 kw = keyword(key, e, c->c_arena); 2860 if (!kw) 2861 return NULL; 2862 asdl_seq_SET(keywords, nkeywords++, kw); 2863 } 2864 } 2865 } 2866 2867 return Call(func, args, keywords, func->lineno, func->col_offset, c->c_arena); 2868 } 2869 2870 static expr_ty 2871 ast_for_testlist(struct compiling *c, const node* n) 2872 { 2873 /* testlist_comp: test (comp_for | (',' test)* [',']) */ 2874 /* testlist: test (',' test)* [','] */ 2875 assert(NCH(n) > 0); 2876 if (TYPE(n) == testlist_comp) { 2877 if (NCH(n) > 1) 2878 assert(TYPE(CHILD(n, 1)) != comp_for); 2879 } 2880 else { 2881 assert(TYPE(n) == testlist || 2882 TYPE(n) == testlist_star_expr); 2883 } 2884 if (NCH(n) == 1) 2885 return ast_for_expr(c, CHILD(n, 0)); 2886 else { 2887 asdl_seq *tmp = seq_for_testlist(c, n); 2888 if (!tmp) 2889 return NULL; 2890 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena); 2891 } 2892 } 2893 2894 static stmt_ty 2895 ast_for_expr_stmt(struct compiling *c, const node *n) 2896 { 2897 REQ(n, expr_stmt); 2898 /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | 2899 ('=' (yield_expr|testlist_star_expr))*) 2900 annassign: ':' test ['=' test] 2901 testlist_star_expr: (test|star_expr) (',' test|star_expr)* [','] 2902 augassign: '+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' 2903 | '<<=' | '>>=' | '**=' | '//=' 2904 test: ... here starts the operator precedence dance 2905 */ 2906 2907 if (NCH(n) == 1) { 2908 expr_ty e = ast_for_testlist(c, CHILD(n, 0)); 2909 if (!e) 2910 return NULL; 2911 2912 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena); 2913 } 2914 else if (TYPE(CHILD(n, 1)) == augassign) { 2915 expr_ty expr1, expr2; 2916 operator_ty newoperator; 2917 node *ch = CHILD(n, 0); 2918 2919 expr1 = ast_for_testlist(c, ch); 2920 if (!expr1) 2921 return NULL; 2922 if(!set_context(c, expr1, Store, ch)) 2923 return NULL; 2924 /* set_context checks that most expressions are not the left side. 2925 Augmented assignments can only have a name, a subscript, or an 2926 attribute on the left, though, so we have to explicitly check for 2927 those. */ 2928 switch (expr1->kind) { 2929 case Name_kind: 2930 case Attribute_kind: 2931 case Subscript_kind: 2932 break; 2933 default: 2934 ast_error(c, ch, "illegal expression for augmented assignment"); 2935 return NULL; 2936 } 2937 2938 ch = CHILD(n, 2); 2939 if (TYPE(ch) == testlist) 2940 expr2 = ast_for_testlist(c, ch); 2941 else 2942 expr2 = ast_for_expr(c, ch); 2943 if (!expr2) 2944 return NULL; 2945 2946 newoperator = ast_for_augassign(c, CHILD(n, 1)); 2947 if (!newoperator) 2948 return NULL; 2949 2950 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena); 2951 } 2952 else if (TYPE(CHILD(n, 1)) == annassign) { 2953 expr_ty expr1, expr2, expr3; 2954 node *ch = CHILD(n, 0); 2955 node *deep, *ann = CHILD(n, 1); 2956 int simple = 1; 2957 2958 /* we keep track of parens to qualify (x) as expression not name */ 2959 deep = ch; 2960 while (NCH(deep) == 1) { 2961 deep = CHILD(deep, 0); 2962 } 2963 if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) { 2964 simple = 0; 2965 } 2966 expr1 = ast_for_testlist(c, ch); 2967 if (!expr1) { 2968 return NULL; 2969 } 2970 switch (expr1->kind) { 2971 case Name_kind: 2972 if (forbidden_name(c, expr1->v.Name.id, n, 0)) { 2973 return NULL; 2974 } 2975 expr1->v.Name.ctx = Store; 2976 break; 2977 case Attribute_kind: 2978 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) { 2979 return NULL; 2980 } 2981 expr1->v.Attribute.ctx = Store; 2982 break; 2983 case Subscript_kind: 2984 expr1->v.Subscript.ctx = Store; 2985 break; 2986 case List_kind: 2987 ast_error(c, ch, 2988 "only single target (not list) can be annotated"); 2989 return NULL; 2990 case Tuple_kind: 2991 ast_error(c, ch, 2992 "only single target (not tuple) can be annotated"); 2993 return NULL; 2994 default: 2995 ast_error(c, ch, 2996 "illegal target for annotation"); 2997 return NULL; 2998 } 2999 3000 if (expr1->kind != Name_kind) { 3001 simple = 0; 3002 } 3003 ch = CHILD(ann, 1); 3004 expr2 = ast_for_expr(c, ch); 3005 if (!expr2) { 3006 return NULL; 3007 } 3008 if (NCH(ann) == 2) { 3009 return AnnAssign(expr1, expr2, NULL, simple, 3010 LINENO(n), n->n_col_offset, c->c_arena); 3011 } 3012 else { 3013 ch = CHILD(ann, 3); 3014 expr3 = ast_for_expr(c, ch); 3015 if (!expr3) { 3016 return NULL; 3017 } 3018 return AnnAssign(expr1, expr2, expr3, simple, 3019 LINENO(n), n->n_col_offset, c->c_arena); 3020 } 3021 } 3022 else { 3023 int i; 3024 asdl_seq *targets; 3025 node *value; 3026 expr_ty expression; 3027 3028 /* a normal assignment */ 3029 REQ(CHILD(n, 1), EQUAL); 3030 targets = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); 3031 if (!targets) 3032 return NULL; 3033 for (i = 0; i < NCH(n) - 2; i += 2) { 3034 expr_ty e; 3035 node *ch = CHILD(n, i); 3036 if (TYPE(ch) == yield_expr) { 3037 ast_error(c, ch, "assignment to yield expression not possible"); 3038 return NULL; 3039 } 3040 e = ast_for_testlist(c, ch); 3041 if (!e) 3042 return NULL; 3043 3044 /* set context to assign */ 3045 if (!set_context(c, e, Store, CHILD(n, i))) 3046 return NULL; 3047 3048 asdl_seq_SET(targets, i / 2, e); 3049 } 3050 value = CHILD(n, NCH(n) - 1); 3051 if (TYPE(value) == testlist_star_expr) 3052 expression = ast_for_testlist(c, value); 3053 else 3054 expression = ast_for_expr(c, value); 3055 if (!expression) 3056 return NULL; 3057 return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena); 3058 } 3059 } 3060 3061 3062 static asdl_seq * 3063 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context) 3064 { 3065 asdl_seq *seq; 3066 int i; 3067 expr_ty e; 3068 3069 REQ(n, exprlist); 3070 3071 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 3072 if (!seq) 3073 return NULL; 3074 for (i = 0; i < NCH(n); i += 2) { 3075 e = ast_for_expr(c, CHILD(n, i)); 3076 if (!e) 3077 return NULL; 3078 asdl_seq_SET(seq, i / 2, e); 3079 if (context && !set_context(c, e, context, CHILD(n, i))) 3080 return NULL; 3081 } 3082 return seq; 3083 } 3084 3085 static stmt_ty 3086 ast_for_del_stmt(struct compiling *c, const node *n) 3087 { 3088 asdl_seq *expr_list; 3089 3090 /* del_stmt: 'del' exprlist */ 3091 REQ(n, del_stmt); 3092 3093 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del); 3094 if (!expr_list) 3095 return NULL; 3096 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena); 3097 } 3098 3099 static stmt_ty 3100 ast_for_flow_stmt(struct compiling *c, const node *n) 3101 { 3102 /* 3103 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt 3104 | yield_stmt 3105 break_stmt: 'break' 3106 continue_stmt: 'continue' 3107 return_stmt: 'return' [testlist] 3108 yield_stmt: yield_expr 3109 yield_expr: 'yield' testlist | 'yield' 'from' test 3110 raise_stmt: 'raise' [test [',' test [',' test]]] 3111 */ 3112 node *ch; 3113 3114 REQ(n, flow_stmt); 3115 ch = CHILD(n, 0); 3116 switch (TYPE(ch)) { 3117 case break_stmt: 3118 return Break(LINENO(n), n->n_col_offset, c->c_arena); 3119 case continue_stmt: 3120 return Continue(LINENO(n), n->n_col_offset, c->c_arena); 3121 case yield_stmt: { /* will reduce to yield_expr */ 3122 expr_ty exp = ast_for_expr(c, CHILD(ch, 0)); 3123 if (!exp) 3124 return NULL; 3125 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena); 3126 } 3127 case return_stmt: 3128 if (NCH(ch) == 1) 3129 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena); 3130 else { 3131 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1)); 3132 if (!expression) 3133 return NULL; 3134 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena); 3135 } 3136 case raise_stmt: 3137 if (NCH(ch) == 1) 3138 return Raise(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena); 3139 else if (NCH(ch) >= 2) { 3140 expr_ty cause = NULL; 3141 expr_ty expression = ast_for_expr(c, CHILD(ch, 1)); 3142 if (!expression) 3143 return NULL; 3144 if (NCH(ch) == 4) { 3145 cause = ast_for_expr(c, CHILD(ch, 3)); 3146 if (!cause) 3147 return NULL; 3148 } 3149 return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c_arena); 3150 } 3151 default: 3152 PyErr_Format(PyExc_SystemError, 3153 "unexpected flow_stmt: %d", TYPE(ch)); 3154 return NULL; 3155 } 3156 } 3157 3158 static alias_ty 3159 alias_for_import_name(struct compiling *c, const node *n, int store) 3160 { 3161 /* 3162 import_as_name: NAME ['as' NAME] 3163 dotted_as_name: dotted_name ['as' NAME] 3164 dotted_name: NAME ('.' NAME)* 3165 */ 3166 identifier str, name; 3167 3168 loop: 3169 switch (TYPE(n)) { 3170 case import_as_name: { 3171 node *name_node = CHILD(n, 0); 3172 str = NULL; 3173 name = NEW_IDENTIFIER(name_node); 3174 if (!name) 3175 return NULL; 3176 if (NCH(n) == 3) { 3177 node *str_node = CHILD(n, 2); 3178 str = NEW_IDENTIFIER(str_node); 3179 if (!str) 3180 return NULL; 3181 if (store && forbidden_name(c, str, str_node, 0)) 3182 return NULL; 3183 } 3184 else { 3185 if (forbidden_name(c, name, name_node, 0)) 3186 return NULL; 3187 } 3188 return alias(name, str, c->c_arena); 3189 } 3190 case dotted_as_name: 3191 if (NCH(n) == 1) { 3192 n = CHILD(n, 0); 3193 goto loop; 3194 } 3195 else { 3196 node *asname_node = CHILD(n, 2); 3197 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0); 3198 if (!a) 3199 return NULL; 3200 assert(!a->asname); 3201 a->asname = NEW_IDENTIFIER(asname_node); 3202 if (!a->asname) 3203 return NULL; 3204 if (forbidden_name(c, a->asname, asname_node, 0)) 3205 return NULL; 3206 return a; 3207 } 3208 break; 3209 case dotted_name: 3210 if (NCH(n) == 1) { 3211 node *name_node = CHILD(n, 0); 3212 name = NEW_IDENTIFIER(name_node); 3213 if (!name) 3214 return NULL; 3215 if (store && forbidden_name(c, name, name_node, 0)) 3216 return NULL; 3217 return alias(name, NULL, c->c_arena); 3218 } 3219 else { 3220 /* Create a string of the form "a.b.c" */ 3221 int i; 3222 size_t len; 3223 char *s; 3224 PyObject *uni; 3225 3226 len = 0; 3227 for (i = 0; i < NCH(n); i += 2) 3228 /* length of string plus one for the dot */ 3229 len += strlen(STR(CHILD(n, i))) + 1; 3230 len--; /* the last name doesn't have a dot */ 3231 str = PyBytes_FromStringAndSize(NULL, len); 3232 if (!str) 3233 return NULL; 3234 s = PyBytes_AS_STRING(str); 3235 if (!s) 3236 return NULL; 3237 for (i = 0; i < NCH(n); i += 2) { 3238 char *sch = STR(CHILD(n, i)); 3239 strcpy(s, STR(CHILD(n, i))); 3240 s += strlen(sch); 3241 *s++ = '.'; 3242 } 3243 --s; 3244 *s = '\0'; 3245 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), 3246 PyBytes_GET_SIZE(str), 3247 NULL); 3248 Py_DECREF(str); 3249 if (!uni) 3250 return NULL; 3251 str = uni; 3252 PyUnicode_InternInPlace(&str); 3253 if (PyArena_AddPyObject(c->c_arena, str) < 0) { 3254 Py_DECREF(str); 3255 return NULL; 3256 } 3257 return alias(str, NULL, c->c_arena); 3258 } 3259 break; 3260 case STAR: 3261 str = PyUnicode_InternFromString("*"); 3262 if (PyArena_AddPyObject(c->c_arena, str) < 0) { 3263 Py_DECREF(str); 3264 return NULL; 3265 } 3266 return alias(str, NULL, c->c_arena); 3267 default: 3268 PyErr_Format(PyExc_SystemError, 3269 "unexpected import name: %d", TYPE(n)); 3270 return NULL; 3271 } 3272 3273 PyErr_SetString(PyExc_SystemError, "unhandled import name condition"); 3274 return NULL; 3275 } 3276 3277 static stmt_ty 3278 ast_for_import_stmt(struct compiling *c, const node *n) 3279 { 3280 /* 3281 import_stmt: import_name | import_from 3282 import_name: 'import' dotted_as_names 3283 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 3284 'import' ('*' | '(' import_as_names ')' | import_as_names) 3285 */ 3286 int lineno; 3287 int col_offset; 3288 int i; 3289 asdl_seq *aliases; 3290 3291 REQ(n, import_stmt); 3292 lineno = LINENO(n); 3293 col_offset = n->n_col_offset; 3294 n = CHILD(n, 0); 3295 if (TYPE(n) == import_name) { 3296 n = CHILD(n, 1); 3297 REQ(n, dotted_as_names); 3298 aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 3299 if (!aliases) 3300 return NULL; 3301 for (i = 0; i < NCH(n); i += 2) { 3302 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1); 3303 if (!import_alias) 3304 return NULL; 3305 asdl_seq_SET(aliases, i / 2, import_alias); 3306 } 3307 return Import(aliases, lineno, col_offset, c->c_arena); 3308 } 3309 else if (TYPE(n) == import_from) { 3310 int n_children; 3311 int idx, ndots = 0; 3312 alias_ty mod = NULL; 3313 identifier modname = NULL; 3314 3315 /* Count the number of dots (for relative imports) and check for the 3316 optional module name */ 3317 for (idx = 1; idx < NCH(n); idx++) { 3318 if (TYPE(CHILD(n, idx)) == dotted_name) { 3319 mod = alias_for_import_name(c, CHILD(n, idx), 0); 3320 if (!mod) 3321 return NULL; 3322 idx++; 3323 break; 3324 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) { 3325 /* three consecutive dots are tokenized as one ELLIPSIS */ 3326 ndots += 3; 3327 continue; 3328 } else if (TYPE(CHILD(n, idx)) != DOT) { 3329 break; 3330 } 3331 ndots++; 3332 } 3333 idx++; /* skip over the 'import' keyword */ 3334 switch (TYPE(CHILD(n, idx))) { 3335 case STAR: 3336 /* from ... import * */ 3337 n = CHILD(n, idx); 3338 n_children = 1; 3339 break; 3340 case LPAR: 3341 /* from ... import (x, y, z) */ 3342 n = CHILD(n, idx + 1); 3343 n_children = NCH(n); 3344 break; 3345 case import_as_names: 3346 /* from ... import x, y, z */ 3347 n = CHILD(n, idx); 3348 n_children = NCH(n); 3349 if (n_children % 2 == 0) { 3350 ast_error(c, n, "trailing comma not allowed without" 3351 " surrounding parentheses"); 3352 return NULL; 3353 } 3354 break; 3355 default: 3356 ast_error(c, n, "Unexpected node-type in from-import"); 3357 return NULL; 3358 } 3359 3360 aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena); 3361 if (!aliases) 3362 return NULL; 3363 3364 /* handle "from ... import *" special b/c there's no children */ 3365 if (TYPE(n) == STAR) { 3366 alias_ty import_alias = alias_for_import_name(c, n, 1); 3367 if (!import_alias) 3368 return NULL; 3369 asdl_seq_SET(aliases, 0, import_alias); 3370 } 3371 else { 3372 for (i = 0; i < NCH(n); i += 2) { 3373 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1); 3374 if (!import_alias) 3375 return NULL; 3376 asdl_seq_SET(aliases, i / 2, import_alias); 3377 } 3378 } 3379 if (mod != NULL) 3380 modname = mod->name; 3381 return ImportFrom(modname, aliases, ndots, lineno, col_offset, 3382 c->c_arena); 3383 } 3384 PyErr_Format(PyExc_SystemError, 3385 "unknown import statement: starts with command '%s'", 3386 STR(CHILD(n, 0))); 3387 return NULL; 3388 } 3389 3390 static stmt_ty 3391 ast_for_global_stmt(struct compiling *c, const node *n) 3392 { 3393 /* global_stmt: 'global' NAME (',' NAME)* */ 3394 identifier name; 3395 asdl_seq *s; 3396 int i; 3397 3398 REQ(n, global_stmt); 3399 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); 3400 if (!s) 3401 return NULL; 3402 for (i = 1; i < NCH(n); i += 2) { 3403 name = NEW_IDENTIFIER(CHILD(n, i)); 3404 if (!name) 3405 return NULL; 3406 asdl_seq_SET(s, i / 2, name); 3407 } 3408 return Global(s, LINENO(n), n->n_col_offset, c->c_arena); 3409 } 3410 3411 static stmt_ty 3412 ast_for_nonlocal_stmt(struct compiling *c, const node *n) 3413 { 3414 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */ 3415 identifier name; 3416 asdl_seq *s; 3417 int i; 3418 3419 REQ(n, nonlocal_stmt); 3420 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); 3421 if (!s) 3422 return NULL; 3423 for (i = 1; i < NCH(n); i += 2) { 3424 name = NEW_IDENTIFIER(CHILD(n, i)); 3425 if (!name) 3426 return NULL; 3427 asdl_seq_SET(s, i / 2, name); 3428 } 3429 return Nonlocal(s, LINENO(n), n->n_col_offset, c->c_arena); 3430 } 3431 3432 static stmt_ty 3433 ast_for_assert_stmt(struct compiling *c, const node *n) 3434 { 3435 /* assert_stmt: 'assert' test [',' test] */ 3436 REQ(n, assert_stmt); 3437 if (NCH(n) == 2) { 3438 expr_ty expression = ast_for_expr(c, CHILD(n, 1)); 3439 if (!expression) 3440 return NULL; 3441 return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena); 3442 } 3443 else if (NCH(n) == 4) { 3444 expr_ty expr1, expr2; 3445 3446 expr1 = ast_for_expr(c, CHILD(n, 1)); 3447 if (!expr1) 3448 return NULL; 3449 expr2 = ast_for_expr(c, CHILD(n, 3)); 3450 if (!expr2) 3451 return NULL; 3452 3453 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena); 3454 } 3455 PyErr_Format(PyExc_SystemError, 3456 "improper number of parts to 'assert' statement: %d", 3457 NCH(n)); 3458 return NULL; 3459 } 3460 3461 static asdl_seq * 3462 ast_for_suite(struct compiling *c, const node *n) 3463 { 3464 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */ 3465 asdl_seq *seq; 3466 stmt_ty s; 3467 int i, total, num, end, pos = 0; 3468 node *ch; 3469 3470 REQ(n, suite); 3471 3472 total = num_stmts(n); 3473 seq = _Py_asdl_seq_new(total, c->c_arena); 3474 if (!seq) 3475 return NULL; 3476 if (TYPE(CHILD(n, 0)) == simple_stmt) { 3477 n = CHILD(n, 0); 3478 /* simple_stmt always ends with a NEWLINE, 3479 and may have a trailing SEMI 3480 */ 3481 end = NCH(n) - 1; 3482 if (TYPE(CHILD(n, end - 1)) == SEMI) 3483 end--; 3484 /* loop by 2 to skip semi-colons */ 3485 for (i = 0; i < end; i += 2) { 3486 ch = CHILD(n, i); 3487 s = ast_for_stmt(c, ch); 3488 if (!s) 3489 return NULL; 3490 asdl_seq_SET(seq, pos++, s); 3491 } 3492 } 3493 else { 3494 for (i = 2; i < (NCH(n) - 1); i++) { 3495 ch = CHILD(n, i); 3496 REQ(ch, stmt); 3497 num = num_stmts(ch); 3498 if (num == 1) { 3499 /* small_stmt or compound_stmt with only one child */ 3500 s = ast_for_stmt(c, ch); 3501 if (!s) 3502 return NULL; 3503 asdl_seq_SET(seq, pos++, s); 3504 } 3505 else { 3506 int j; 3507 ch = CHILD(ch, 0); 3508 REQ(ch, simple_stmt); 3509 for (j = 0; j < NCH(ch); j += 2) { 3510 /* statement terminates with a semi-colon ';' */ 3511 if (NCH(CHILD(ch, j)) == 0) { 3512 assert((j + 1) == NCH(ch)); 3513 break; 3514 } 3515 s = ast_for_stmt(c, CHILD(ch, j)); 3516 if (!s) 3517 return NULL; 3518 asdl_seq_SET(seq, pos++, s); 3519 } 3520 } 3521 } 3522 } 3523 assert(pos == seq->size); 3524 return seq; 3525 } 3526 3527 static stmt_ty 3528 ast_for_if_stmt(struct compiling *c, const node *n) 3529 { 3530 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)* 3531 ['else' ':' suite] 3532 */ 3533 char *s; 3534 3535 REQ(n, if_stmt); 3536 3537 if (NCH(n) == 4) { 3538 expr_ty expression; 3539 asdl_seq *suite_seq; 3540 3541 expression = ast_for_expr(c, CHILD(n, 1)); 3542 if (!expression) 3543 return NULL; 3544 suite_seq = ast_for_suite(c, CHILD(n, 3)); 3545 if (!suite_seq) 3546 return NULL; 3547 3548 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, 3549 c->c_arena); 3550 } 3551 3552 s = STR(CHILD(n, 4)); 3553 /* s[2], the third character in the string, will be 3554 's' for el_s_e, or 3555 'i' for el_i_f 3556 */ 3557 if (s[2] == 's') { 3558 expr_ty expression; 3559 asdl_seq *seq1, *seq2; 3560 3561 expression = ast_for_expr(c, CHILD(n, 1)); 3562 if (!expression) 3563 return NULL; 3564 seq1 = ast_for_suite(c, CHILD(n, 3)); 3565 if (!seq1) 3566 return NULL; 3567 seq2 = ast_for_suite(c, CHILD(n, 6)); 3568 if (!seq2) 3569 return NULL; 3570 3571 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, 3572 c->c_arena); 3573 } 3574 else if (s[2] == 'i') { 3575 int i, n_elif, has_else = 0; 3576 expr_ty expression; 3577 asdl_seq *suite_seq; 3578 asdl_seq *orelse = NULL; 3579 n_elif = NCH(n) - 4; 3580 /* must reference the child n_elif+1 since 'else' token is third, 3581 not fourth, child from the end. */ 3582 if (TYPE(CHILD(n, (n_elif + 1))) == NAME 3583 && STR(CHILD(n, (n_elif + 1)))[2] == 's') { 3584 has_else = 1; 3585 n_elif -= 3; 3586 } 3587 n_elif /= 4; 3588 3589 if (has_else) { 3590 asdl_seq *suite_seq2; 3591 3592 orelse = _Py_asdl_seq_new(1, c->c_arena); 3593 if (!orelse) 3594 return NULL; 3595 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6)); 3596 if (!expression) 3597 return NULL; 3598 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4)); 3599 if (!suite_seq) 3600 return NULL; 3601 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1)); 3602 if (!suite_seq2) 3603 return NULL; 3604 3605 asdl_seq_SET(orelse, 0, 3606 If(expression, suite_seq, suite_seq2, 3607 LINENO(CHILD(n, NCH(n) - 6)), 3608 CHILD(n, NCH(n) - 6)->n_col_offset, 3609 c->c_arena)); 3610 /* the just-created orelse handled the last elif */ 3611 n_elif--; 3612 } 3613 3614 for (i = 0; i < n_elif; i++) { 3615 int off = 5 + (n_elif - i - 1) * 4; 3616 asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena); 3617 if (!newobj) 3618 return NULL; 3619 expression = ast_for_expr(c, CHILD(n, off)); 3620 if (!expression) 3621 return NULL; 3622 suite_seq = ast_for_suite(c, CHILD(n, off + 2)); 3623 if (!suite_seq) 3624 return NULL; 3625 3626 asdl_seq_SET(newobj, 0, 3627 If(expression, suite_seq, orelse, 3628 LINENO(CHILD(n, off)), 3629 CHILD(n, off)->n_col_offset, c->c_arena)); 3630 orelse = newobj; 3631 } 3632 expression = ast_for_expr(c, CHILD(n, 1)); 3633 if (!expression) 3634 return NULL; 3635 suite_seq = ast_for_suite(c, CHILD(n, 3)); 3636 if (!suite_seq) 3637 return NULL; 3638 return If(expression, suite_seq, orelse, 3639 LINENO(n), n->n_col_offset, c->c_arena); 3640 } 3641 3642 PyErr_Format(PyExc_SystemError, 3643 "unexpected token in 'if' statement: %s", s); 3644 return NULL; 3645 } 3646 3647 static stmt_ty 3648 ast_for_while_stmt(struct compiling *c, const node *n) 3649 { 3650 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */ 3651 REQ(n, while_stmt); 3652 3653 if (NCH(n) == 4) { 3654 expr_ty expression; 3655 asdl_seq *suite_seq; 3656 3657 expression = ast_for_expr(c, CHILD(n, 1)); 3658 if (!expression) 3659 return NULL; 3660 suite_seq = ast_for_suite(c, CHILD(n, 3)); 3661 if (!suite_seq) 3662 return NULL; 3663 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena); 3664 } 3665 else if (NCH(n) == 7) { 3666 expr_ty expression; 3667 asdl_seq *seq1, *seq2; 3668 3669 expression = ast_for_expr(c, CHILD(n, 1)); 3670 if (!expression) 3671 return NULL; 3672 seq1 = ast_for_suite(c, CHILD(n, 3)); 3673 if (!seq1) 3674 return NULL; 3675 seq2 = ast_for_suite(c, CHILD(n, 6)); 3676 if (!seq2) 3677 return NULL; 3678 3679 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena); 3680 } 3681 3682 PyErr_Format(PyExc_SystemError, 3683 "wrong number of tokens for 'while' statement: %d", 3684 NCH(n)); 3685 return NULL; 3686 } 3687 3688 static stmt_ty 3689 ast_for_for_stmt(struct compiling *c, const node *n, int is_async) 3690 { 3691 asdl_seq *_target, *seq = NULL, *suite_seq; 3692 expr_ty expression; 3693 expr_ty target, first; 3694 const node *node_target; 3695 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */ 3696 REQ(n, for_stmt); 3697 3698 if (NCH(n) == 9) { 3699 seq = ast_for_suite(c, CHILD(n, 8)); 3700 if (!seq) 3701 return NULL; 3702 } 3703 3704 node_target = CHILD(n, 1); 3705 _target = ast_for_exprlist(c, node_target, Store); 3706 if (!_target) 3707 return NULL; 3708 /* Check the # of children rather than the length of _target, since 3709 for x, in ... has 1 element in _target, but still requires a Tuple. */ 3710 first = (expr_ty)asdl_seq_GET(_target, 0); 3711 if (NCH(node_target) == 1) 3712 target = first; 3713 else 3714 target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena); 3715 3716 expression = ast_for_testlist(c, CHILD(n, 3)); 3717 if (!expression) 3718 return NULL; 3719 suite_seq = ast_for_suite(c, CHILD(n, 5)); 3720 if (!suite_seq) 3721 return NULL; 3722 3723 if (is_async) 3724 return AsyncFor(target, expression, suite_seq, seq, 3725 LINENO(n), n->n_col_offset, 3726 c->c_arena); 3727 else 3728 return For(target, expression, suite_seq, seq, 3729 LINENO(n), n->n_col_offset, 3730 c->c_arena); 3731 } 3732 3733 static excepthandler_ty 3734 ast_for_except_clause(struct compiling *c, const node *exc, node *body) 3735 { 3736 /* except_clause: 'except' [test ['as' test]] */ 3737 REQ(exc, except_clause); 3738 REQ(body, suite); 3739 3740 if (NCH(exc) == 1) { 3741 asdl_seq *suite_seq = ast_for_suite(c, body); 3742 if (!suite_seq) 3743 return NULL; 3744 3745 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc), 3746 exc->n_col_offset, c->c_arena); 3747 } 3748 else if (NCH(exc) == 2) { 3749 expr_ty expression; 3750 asdl_seq *suite_seq; 3751 3752 expression = ast_for_expr(c, CHILD(exc, 1)); 3753 if (!expression) 3754 return NULL; 3755 suite_seq = ast_for_suite(c, body); 3756 if (!suite_seq) 3757 return NULL; 3758 3759 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc), 3760 exc->n_col_offset, c->c_arena); 3761 } 3762 else if (NCH(exc) == 4) { 3763 asdl_seq *suite_seq; 3764 expr_ty expression; 3765 identifier e = NEW_IDENTIFIER(CHILD(exc, 3)); 3766 if (!e) 3767 return NULL; 3768 if (forbidden_name(c, e, CHILD(exc, 3), 0)) 3769 return NULL; 3770 expression = ast_for_expr(c, CHILD(exc, 1)); 3771 if (!expression) 3772 return NULL; 3773 suite_seq = ast_for_suite(c, body); 3774 if (!suite_seq) 3775 return NULL; 3776 3777 return ExceptHandler(expression, e, suite_seq, LINENO(exc), 3778 exc->n_col_offset, c->c_arena); 3779 } 3780 3781 PyErr_Format(PyExc_SystemError, 3782 "wrong number of children for 'except' clause: %d", 3783 NCH(exc)); 3784 return NULL; 3785 } 3786 3787 static stmt_ty 3788 ast_for_try_stmt(struct compiling *c, const node *n) 3789 { 3790 const int nch = NCH(n); 3791 int n_except = (nch - 3)/3; 3792 asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL; 3793 3794 REQ(n, try_stmt); 3795 3796 body = ast_for_suite(c, CHILD(n, 2)); 3797 if (body == NULL) 3798 return NULL; 3799 3800 if (TYPE(CHILD(n, nch - 3)) == NAME) { 3801 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) { 3802 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) { 3803 /* we can assume it's an "else", 3804 because nch >= 9 for try-else-finally and 3805 it would otherwise have a type of except_clause */ 3806 orelse = ast_for_suite(c, CHILD(n, nch - 4)); 3807 if (orelse == NULL) 3808 return NULL; 3809 n_except--; 3810 } 3811 3812 finally = ast_for_suite(c, CHILD(n, nch - 1)); 3813 if (finally == NULL) 3814 return NULL; 3815 n_except--; 3816 } 3817 else { 3818 /* we can assume it's an "else", 3819 otherwise it would have a type of except_clause */ 3820 orelse = ast_for_suite(c, CHILD(n, nch - 1)); 3821 if (orelse == NULL) 3822 return NULL; 3823 n_except--; 3824 } 3825 } 3826 else if (TYPE(CHILD(n, nch - 3)) != except_clause) { 3827 ast_error(c, n, "malformed 'try' statement"); 3828 return NULL; 3829 } 3830 3831 if (n_except > 0) { 3832 int i; 3833 /* process except statements to create a try ... except */ 3834 handlers = _Py_asdl_seq_new(n_except, c->c_arena); 3835 if (handlers == NULL) 3836 return NULL; 3837 3838 for (i = 0; i < n_except; i++) { 3839 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3), 3840 CHILD(n, 5 + i * 3)); 3841 if (!e) 3842 return NULL; 3843 asdl_seq_SET(handlers, i, e); 3844 } 3845 } 3846 3847 assert(finally != NULL || asdl_seq_LEN(handlers)); 3848 return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, c->c_arena); 3849 } 3850 3851 /* with_item: test ['as' expr] */ 3852 static withitem_ty 3853 ast_for_with_item(struct compiling *c, const node *n) 3854 { 3855 expr_ty context_expr, optional_vars = NULL; 3856 3857 REQ(n, with_item); 3858 context_expr = ast_for_expr(c, CHILD(n, 0)); 3859 if (!context_expr) 3860 return NULL; 3861 if (NCH(n) == 3) { 3862 optional_vars = ast_for_expr(c, CHILD(n, 2)); 3863 3864 if (!optional_vars) { 3865 return NULL; 3866 } 3867 if (!set_context(c, optional_vars, Store, n)) { 3868 return NULL; 3869 } 3870 } 3871 3872 return withitem(context_expr, optional_vars, c->c_arena); 3873 } 3874 3875 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */ 3876 static stmt_ty 3877 ast_for_with_stmt(struct compiling *c, const node *n, int is_async) 3878 { 3879 int i, n_items; 3880 asdl_seq *items, *body; 3881 3882 REQ(n, with_stmt); 3883 3884 n_items = (NCH(n) - 2) / 2; 3885 items = _Py_asdl_seq_new(n_items, c->c_arena); 3886 if (!items) 3887 return NULL; 3888 for (i = 1; i < NCH(n) - 2; i += 2) { 3889 withitem_ty item = ast_for_with_item(c, CHILD(n, i)); 3890 if (!item) 3891 return NULL; 3892 asdl_seq_SET(items, (i - 1) / 2, item); 3893 } 3894 3895 body = ast_for_suite(c, CHILD(n, NCH(n) - 1)); 3896 if (!body) 3897 return NULL; 3898 3899 if (is_async) 3900 return AsyncWith(items, body, LINENO(n), n->n_col_offset, c->c_arena); 3901 else 3902 return With(items, body, LINENO(n), n->n_col_offset, c->c_arena); 3903 } 3904 3905 static stmt_ty 3906 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) 3907 { 3908 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */ 3909 PyObject *classname; 3910 asdl_seq *s; 3911 expr_ty call; 3912 3913 REQ(n, classdef); 3914 3915 if (NCH(n) == 4) { /* class NAME ':' suite */ 3916 s = ast_for_suite(c, CHILD(n, 3)); 3917 if (!s) 3918 return NULL; 3919 classname = NEW_IDENTIFIER(CHILD(n, 1)); 3920 if (!classname) 3921 return NULL; 3922 if (forbidden_name(c, classname, CHILD(n, 3), 0)) 3923 return NULL; 3924 return ClassDef(classname, NULL, NULL, s, decorator_seq, LINENO(n), 3925 n->n_col_offset, c->c_arena); 3926 } 3927 3928 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */ 3929 s = ast_for_suite(c, CHILD(n,5)); 3930 if (!s) 3931 return NULL; 3932 classname = NEW_IDENTIFIER(CHILD(n, 1)); 3933 if (!classname) 3934 return NULL; 3935 if (forbidden_name(c, classname, CHILD(n, 3), 0)) 3936 return NULL; 3937 return ClassDef(classname, NULL, NULL, s, decorator_seq, LINENO(n), 3938 n->n_col_offset, c->c_arena); 3939 } 3940 3941 /* class NAME '(' arglist ')' ':' suite */ 3942 /* build up a fake Call node so we can extract its pieces */ 3943 { 3944 PyObject *dummy_name; 3945 expr_ty dummy; 3946 dummy_name = NEW_IDENTIFIER(CHILD(n, 1)); 3947 if (!dummy_name) 3948 return NULL; 3949 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, c->c_arena); 3950 call = ast_for_call(c, CHILD(n, 3), dummy); 3951 if (!call) 3952 return NULL; 3953 } 3954 s = ast_for_suite(c, CHILD(n, 6)); 3955 if (!s) 3956 return NULL; 3957 classname = NEW_IDENTIFIER(CHILD(n, 1)); 3958 if (!classname) 3959 return NULL; 3960 if (forbidden_name(c, classname, CHILD(n, 1), 0)) 3961 return NULL; 3962 3963 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s, 3964 decorator_seq, LINENO(n), n->n_col_offset, c->c_arena); 3965 } 3966 3967 static stmt_ty 3968 ast_for_stmt(struct compiling *c, const node *n) 3969 { 3970 if (TYPE(n) == stmt) { 3971 assert(NCH(n) == 1); 3972 n = CHILD(n, 0); 3973 } 3974 if (TYPE(n) == simple_stmt) { 3975 assert(num_stmts(n) == 1); 3976 n = CHILD(n, 0); 3977 } 3978 if (TYPE(n) == small_stmt) { 3979 n = CHILD(n, 0); 3980 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt 3981 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt 3982 */ 3983 switch (TYPE(n)) { 3984 case expr_stmt: 3985 return ast_for_expr_stmt(c, n); 3986 case del_stmt: 3987 return ast_for_del_stmt(c, n); 3988 case pass_stmt: 3989 return Pass(LINENO(n), n->n_col_offset, c->c_arena); 3990 case flow_stmt: 3991 return ast_for_flow_stmt(c, n); 3992 case import_stmt: 3993 return ast_for_import_stmt(c, n); 3994 case global_stmt: 3995 return ast_for_global_stmt(c, n); 3996 case nonlocal_stmt: 3997 return ast_for_nonlocal_stmt(c, n); 3998 case assert_stmt: 3999 return ast_for_assert_stmt(c, n); 4000 default: 4001 PyErr_Format(PyExc_SystemError, 4002 "unhandled small_stmt: TYPE=%d NCH=%d\n", 4003 TYPE(n), NCH(n)); 4004 return NULL; 4005 } 4006 } 4007 else { 4008 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt 4009 | funcdef | classdef | decorated | async_stmt 4010 */ 4011 node *ch = CHILD(n, 0); 4012 REQ(n, compound_stmt); 4013 switch (TYPE(ch)) { 4014 case if_stmt: 4015 return ast_for_if_stmt(c, ch); 4016 case while_stmt: 4017 return ast_for_while_stmt(c, ch); 4018 case for_stmt: 4019 return ast_for_for_stmt(c, ch, 0); 4020 case try_stmt: 4021 return ast_for_try_stmt(c, ch); 4022 case with_stmt: 4023 return ast_for_with_stmt(c, ch, 0); 4024 case funcdef: 4025 return ast_for_funcdef(c, ch, NULL); 4026 case classdef: 4027 return ast_for_classdef(c, ch, NULL); 4028 case decorated: 4029 return ast_for_decorated(c, ch); 4030 case async_stmt: 4031 return ast_for_async_stmt(c, ch); 4032 default: 4033 PyErr_Format(PyExc_SystemError, 4034 "unhandled small_stmt: TYPE=%d NCH=%d\n", 4035 TYPE(n), NCH(n)); 4036 return NULL; 4037 } 4038 } 4039 } 4040 4041 static PyObject * 4042 parsenumber_raw(struct compiling *c, const char *s) 4043 { 4044 const char *end; 4045 long x; 4046 double dx; 4047 Py_complex compl; 4048 int imflag; 4049 4050 assert(s != NULL); 4051 errno = 0; 4052 end = s + strlen(s) - 1; 4053 imflag = *end == 'j' || *end == 'J'; 4054 if (s[0] == '0') { 4055 x = (long) PyOS_strtoul(s, (char **)&end, 0); 4056 if (x < 0 && errno == 0) { 4057 return PyLong_FromString(s, (char **)0, 0); 4058 } 4059 } 4060 else 4061 x = PyOS_strtol(s, (char **)&end, 0); 4062 if (*end == '\0') { 4063 if (errno != 0) 4064 return PyLong_FromString(s, (char **)0, 0); 4065 return PyLong_FromLong(x); 4066 } 4067 /* XXX Huge floats may silently fail */ 4068 if (imflag) { 4069 compl.real = 0.; 4070 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); 4071 if (compl.imag == -1.0 && PyErr_Occurred()) 4072 return NULL; 4073 return PyComplex_FromCComplex(compl); 4074 } 4075 else 4076 { 4077 dx = PyOS_string_to_double(s, NULL, NULL); 4078 if (dx == -1.0 && PyErr_Occurred()) 4079 return NULL; 4080 return PyFloat_FromDouble(dx); 4081 } 4082 } 4083 4084 static PyObject * 4085 parsenumber(struct compiling *c, const char *s) 4086 { 4087 char *dup, *end; 4088 PyObject *res = NULL; 4089 4090 assert(s != NULL); 4091 4092 if (strchr(s, '_') == NULL) { 4093 return parsenumber_raw(c, s); 4094 } 4095 /* Create a duplicate without underscores. */ 4096 dup = PyMem_Malloc(strlen(s) + 1); 4097 end = dup; 4098 for (; *s; s++) { 4099 if (*s != '_') { 4100 *end++ = *s; 4101 } 4102 } 4103 *end = '\0'; 4104 res = parsenumber_raw(c, dup); 4105 PyMem_Free(dup); 4106 return res; 4107 } 4108 4109 static PyObject * 4110 decode_utf8(struct compiling *c, const char **sPtr, const char *end) 4111 { 4112 const char *s, *t; 4113 t = s = *sPtr; 4114 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ 4115 while (s < end && (*s & 0x80)) s++; 4116 *sPtr = s; 4117 return PyUnicode_DecodeUTF8(t, s - t, NULL); 4118 } 4119 4120 static int 4121 warn_invalid_escape_sequence(struct compiling *c, const node *n, 4122 char first_invalid_escape_char) 4123 { 4124 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c", 4125 first_invalid_escape_char); 4126 if (msg == NULL) { 4127 return -1; 4128 } 4129 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, 4130 c->c_filename, LINENO(n), 4131 NULL, NULL) < 0 && 4132 PyErr_ExceptionMatches(PyExc_DeprecationWarning)) 4133 { 4134 const char *s; 4135 4136 /* Replace the DeprecationWarning exception with a SyntaxError 4137 to get a more accurate error report */ 4138 PyErr_Clear(); 4139 4140 s = PyUnicode_AsUTF8(msg); 4141 if (s != NULL) { 4142 ast_error(c, n, s); 4143 } 4144 Py_DECREF(msg); 4145 return -1; 4146 } 4147 Py_DECREF(msg); 4148 return 0; 4149 } 4150 4151 static PyObject * 4152 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, 4153 size_t len) 4154 { 4155 PyObject *v, *u; 4156 char *buf; 4157 char *p; 4158 const char *end; 4159 4160 /* check for integer overflow */ 4161 if (len > SIZE_MAX / 6) 4162 return NULL; 4163 /* "" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 4164 "\" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ 4165 u = PyBytes_FromStringAndSize((char *)NULL, len * 6); 4166 if (u == NULL) 4167 return NULL; 4168 p = buf = PyBytes_AsString(u); 4169 end = s + len; 4170 while (s < end) { 4171 if (*s == '\\') { 4172 *p++ = *s++; 4173 if (*s & 0x80) { 4174 strcpy(p, "u005c"); 4175 p += 5; 4176 } 4177 } 4178 if (*s & 0x80) { /* XXX inefficient */ 4179 PyObject *w; 4180 int kind; 4181 void *data; 4182 Py_ssize_t len, i; 4183 w = decode_utf8(c, &s, end); 4184 if (w == NULL) { 4185 Py_DECREF(u); 4186 return NULL; 4187 } 4188 kind = PyUnicode_KIND(w); 4189 data = PyUnicode_DATA(w); 4190 len = PyUnicode_GET_LENGTH(w); 4191 for (i = 0; i < len; i++) { 4192 Py_UCS4 chr = PyUnicode_READ(kind, data, i); 4193 sprintf(p, "\\U%08x", chr); 4194 p += 10; 4195 } 4196 /* Should be impossible to overflow */ 4197 assert(p - buf <= Py_SIZE(u)); 4198 Py_DECREF(w); 4199 } else { 4200 *p++ = *s++; 4201 } 4202 } 4203 len = p - buf; 4204 s = buf; 4205 4206 const char *first_invalid_escape; 4207 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape); 4208 4209 if (v != NULL && first_invalid_escape != NULL) { 4210 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) { 4211 /* We have not decref u before because first_invalid_escape points 4212 inside u. */ 4213 Py_XDECREF(u); 4214 Py_DECREF(v); 4215 return NULL; 4216 } 4217 } 4218 Py_XDECREF(u); 4219 return v; 4220 } 4221 4222 static PyObject * 4223 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s, 4224 size_t len) 4225 { 4226 const char *first_invalid_escape; 4227 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL, 4228 &first_invalid_escape); 4229 if (result == NULL) 4230 return NULL; 4231 4232 if (first_invalid_escape != NULL) { 4233 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) { 4234 Py_DECREF(result); 4235 return NULL; 4236 } 4237 } 4238 return result; 4239 } 4240 4241 /* Compile this expression in to an expr_ty. Add parens around the 4242 expression, in order to allow leading spaces in the expression. */ 4243 static expr_ty 4244 fstring_compile_expr(const char *expr_start, const char *expr_end, 4245 struct compiling *c, const node *n) 4246 4247 { 4248 int all_whitespace = 1; 4249 int kind; 4250 void *data; 4251 PyCompilerFlags cf; 4252 mod_ty mod; 4253 char *str; 4254 PyObject *o; 4255 Py_ssize_t len; 4256 Py_ssize_t i; 4257 4258 assert(expr_end >= expr_start); 4259 assert(*(expr_start-1) == '{'); 4260 assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':'); 4261 4262 /* We know there are no escapes here, because backslashes are not allowed, 4263 and we know it's utf-8 encoded (per PEP 263). But, in order to check 4264 that each char is not whitespace, we need to decode it to unicode. 4265 Which is unfortunate, but such is life. */ 4266 4267 /* If the substring is all whitespace, it's an error. We need to catch 4268 this here, and not when we call PyParser_ASTFromString, because turning 4269 the expression '' in to '()' would go from being invalid to valid. */ 4270 /* Note that this code says an empty string is all whitespace. That's 4271 important. There's a test for it: f'{}'. */ 4272 o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL); 4273 if (o == NULL) 4274 return NULL; 4275 len = PyUnicode_GET_LENGTH(o); 4276 kind = PyUnicode_KIND(o); 4277 data = PyUnicode_DATA(o); 4278 for (i = 0; i < len; i++) { 4279 if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) { 4280 all_whitespace = 0; 4281 break; 4282 } 4283 } 4284 Py_DECREF(o); 4285 if (all_whitespace) { 4286 ast_error(c, n, "f-string: empty expression not allowed"); 4287 return NULL; 4288 } 4289 4290 /* Reuse len to be the length of the utf-8 input string. */ 4291 len = expr_end - expr_start; 4292 /* Allocate 3 extra bytes: open paren, close paren, null byte. */ 4293 str = PyMem_RawMalloc(len + 3); 4294 if (str == NULL) 4295 return NULL; 4296 4297 str[0] = '('; 4298 memcpy(str+1, expr_start, len); 4299 str[len+1] = ')'; 4300 str[len+2] = 0; 4301 4302 cf.cf_flags = PyCF_ONLY_AST; 4303 mod = PyParser_ASTFromString(str, "<fstring>", 4304 Py_eval_input, &cf, c->c_arena); 4305 PyMem_RawFree(str); 4306 if (!mod) 4307 return NULL; 4308 return mod->v.Expression.body; 4309 } 4310 4311 /* Return -1 on error. 4312 4313 Return 0 if we reached the end of the literal. 4314 4315 Return 1 if we haven't reached the end of the literal, but we want 4316 the caller to process the literal up to this point. Used for 4317 doubled braces. 4318 */ 4319 static int 4320 fstring_find_literal(const char **str, const char *end, int raw, 4321 PyObject **literal, int recurse_lvl, 4322 struct compiling *c, const node *n) 4323 { 4324 /* Get any literal string. It ends when we hit an un-doubled left 4325 brace (which isn't part of a unicode name escape such as 4326 "\N{EULER CONSTANT}"), or the end of the string. */ 4327 4328 const char *literal_start = *str; 4329 const char *literal_end; 4330 int in_named_escape = 0; 4331 int result = 0; 4332 4333 assert(*literal == NULL); 4334 for (; *str < end; (*str)++) { 4335 char ch = **str; 4336 if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 && 4337 *(*str-2) == '\\' && *(*str-1) == 'N') { 4338 in_named_escape = 1; 4339 } else if (in_named_escape && ch == '}') { 4340 in_named_escape = 0; 4341 } else if (ch == '{' || ch == '}') { 4342 /* Check for doubled braces, but only at the top level. If 4343 we checked at every level, then f'{0:{3}}' would fail 4344 with the two closing braces. */ 4345 if (recurse_lvl == 0) { 4346 if (*str+1 < end && *(*str+1) == ch) { 4347 /* We're going to tell the caller that the literal ends 4348 here, but that they should continue scanning. But also 4349 skip over the second brace when we resume scanning. */ 4350 literal_end = *str+1; 4351 *str += 2; 4352 result = 1; 4353 goto done; 4354 } 4355 4356 /* Where a single '{' is the start of a new expression, a 4357 single '}' is not allowed. */ 4358 if (ch == '}') { 4359 ast_error(c, n, "f-string: single '}' is not allowed"); 4360 return -1; 4361 } 4362 } 4363 /* We're either at a '{', which means we're starting another 4364 expression; or a '}', which means we're at the end of this 4365 f-string (for a nested format_spec). */ 4366 break; 4367 } 4368 } 4369 literal_end = *str; 4370 assert(*str <= end); 4371 assert(*str == end || **str == '{' || **str == '}'); 4372 done: 4373 if (literal_start != literal_end) { 4374 if (raw) 4375 *literal = PyUnicode_DecodeUTF8Stateful(literal_start, 4376 literal_end-literal_start, 4377 NULL, NULL); 4378 else 4379 *literal = decode_unicode_with_escapes(c, n, literal_start, 4380 literal_end-literal_start); 4381 if (!*literal) 4382 return -1; 4383 } 4384 return result; 4385 } 4386 4387 /* Forward declaration because parsing is recursive. */ 4388 static expr_ty 4389 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl, 4390 struct compiling *c, const node *n); 4391 4392 /* Parse the f-string at *str, ending at end. We know *str starts an 4393 expression (so it must be a '{'). Returns the FormattedValue node, 4394 which includes the expression, conversion character, and 4395 format_spec expression. 4396 4397 Note that I don't do a perfect job here: I don't make sure that a 4398 closing brace doesn't match an opening paren, for example. It 4399 doesn't need to error on all invalid expressions, just correctly 4400 find the end of all valid ones. Any errors inside the expression 4401 will be caught when we parse it later. */ 4402 static int 4403 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl, 4404 expr_ty *expression, struct compiling *c, const node *n) 4405 { 4406 /* Return -1 on error, else 0. */ 4407 4408 const char *expr_start; 4409 const char *expr_end; 4410 expr_ty simple_expression; 4411 expr_ty format_spec = NULL; /* Optional format specifier. */ 4412 int conversion = -1; /* The conversion char. -1 if not specified. */ 4413 4414 /* 0 if we're not in a string, else the quote char we're trying to 4415 match (single or double quote). */ 4416 char quote_char = 0; 4417 4418 /* If we're inside a string, 1=normal, 3=triple-quoted. */ 4419 int string_type = 0; 4420 4421 /* Keep track of nesting level for braces/parens/brackets in 4422 expressions. */ 4423 Py_ssize_t nested_depth = 0; 4424 4425 /* Can only nest one level deep. */ 4426 if (recurse_lvl >= 2) { 4427 ast_error(c, n, "f-string: expressions nested too deeply"); 4428 return -1; 4429 } 4430 4431 /* The first char must be a left brace, or we wouldn't have gotten 4432 here. Skip over it. */ 4433 assert(**str == '{'); 4434 *str += 1; 4435 4436 expr_start = *str; 4437 for (; *str < end; (*str)++) { 4438 char ch; 4439 4440 /* Loop invariants. */ 4441 assert(nested_depth >= 0); 4442 assert(*str >= expr_start && *str < end); 4443 if (quote_char) 4444 assert(string_type == 1 || string_type == 3); 4445 else 4446 assert(string_type == 0); 4447 4448 ch = **str; 4449 /* Nowhere inside an expression is a backslash allowed. */ 4450 if (ch == '\\') { 4451 /* Error: can't include a backslash character, inside 4452 parens or strings or not. */ 4453 ast_error(c, n, "f-string expression part " 4454 "cannot include a backslash"); 4455 return -1; 4456 } 4457 if (quote_char) { 4458 /* We're inside a string. See if we're at the end. */ 4459 /* This code needs to implement the same non-error logic 4460 as tok_get from tokenizer.c, at the letter_quote 4461 label. To actually share that code would be a 4462 nightmare. But, it's unlikely to change and is small, 4463 so duplicate it here. Note we don't need to catch all 4464 of the errors, since they'll be caught when parsing the 4465 expression. We just need to match the non-error 4466 cases. Thus we can ignore \n in single-quoted strings, 4467 for example. Or non-terminated strings. */ 4468 if (ch == quote_char) { 4469 /* Does this match the string_type (single or triple 4470 quoted)? */ 4471 if (string_type == 3) { 4472 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) { 4473 /* We're at the end of a triple quoted string. */ 4474 *str += 2; 4475 string_type = 0; 4476 quote_char = 0; 4477 continue; 4478 } 4479 } else { 4480 /* We're at the end of a normal string. */ 4481 quote_char = 0; 4482 string_type = 0; 4483 continue; 4484 } 4485 } 4486 } else if (ch == '\'' || ch == '"') { 4487 /* Is this a triple quoted string? */ 4488 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) { 4489 string_type = 3; 4490 *str += 2; 4491 } else { 4492 /* Start of a normal string. */ 4493 string_type = 1; 4494 } 4495 /* Start looking for the end of the string. */ 4496 quote_char = ch; 4497 } else if (ch == '[' || ch == '{' || ch == '(') { 4498 nested_depth++; 4499 } else if (nested_depth != 0 && 4500 (ch == ']' || ch == '}' || ch == ')')) { 4501 nested_depth--; 4502 } else if (ch == '#') { 4503 /* Error: can't include a comment character, inside parens 4504 or not. */ 4505 ast_error(c, n, "f-string expression part cannot include '#'"); 4506 return -1; 4507 } else if (nested_depth == 0 && 4508 (ch == '!' || ch == ':' || ch == '}')) { 4509 /* First, test for the special case of "!=". Since '=' is 4510 not an allowed conversion character, nothing is lost in 4511 this test. */ 4512 if (ch == '!' && *str+1 < end && *(*str+1) == '=') { 4513 /* This isn't a conversion character, just continue. */ 4514 continue; 4515 } 4516 /* Normal way out of this loop. */ 4517 break; 4518 } else { 4519 /* Just consume this char and loop around. */ 4520 } 4521 } 4522 expr_end = *str; 4523 /* If we leave this loop in a string or with mismatched parens, we 4524 don't care. We'll get a syntax error when compiling the 4525 expression. But, we can produce a better error message, so 4526 let's just do that.*/ 4527 if (quote_char) { 4528 ast_error(c, n, "f-string: unterminated string"); 4529 return -1; 4530 } 4531 if (nested_depth) { 4532 ast_error(c, n, "f-string: mismatched '(', '{', or '['"); 4533 return -1; 4534 } 4535 4536 if (*str >= end) 4537 goto unexpected_end_of_string; 4538 4539 /* Compile the expression as soon as possible, so we show errors 4540 related to the expression before errors related to the 4541 conversion or format_spec. */ 4542 simple_expression = fstring_compile_expr(expr_start, expr_end, c, n); 4543 if (!simple_expression) 4544 return -1; 4545 4546 /* Check for a conversion char, if present. */ 4547 if (**str == '!') { 4548 *str += 1; 4549 if (*str >= end) 4550 goto unexpected_end_of_string; 4551 4552 conversion = **str; 4553 *str += 1; 4554 4555 /* Validate the conversion. */ 4556 if (!(conversion == 's' || conversion == 'r' 4557 || conversion == 'a')) { 4558 ast_error(c, n, "f-string: invalid conversion character: " 4559 "expected 's', 'r', or 'a'"); 4560 return -1; 4561 } 4562 } 4563 4564 /* Check for the format spec, if present. */ 4565 if (*str >= end) 4566 goto unexpected_end_of_string; 4567 if (**str == ':') { 4568 *str += 1; 4569 if (*str >= end) 4570 goto unexpected_end_of_string; 4571 4572 /* Parse the format spec. */ 4573 format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n); 4574 if (!format_spec) 4575 return -1; 4576 } 4577 4578 if (*str >= end || **str != '}') 4579 goto unexpected_end_of_string; 4580 4581 /* We're at a right brace. Consume it. */ 4582 assert(*str < end); 4583 assert(**str == '}'); 4584 *str += 1; 4585 4586 /* And now create the FormattedValue node that represents this 4587 entire expression with the conversion and format spec. */ 4588 *expression = FormattedValue(simple_expression, conversion, 4589 format_spec, LINENO(n), n->n_col_offset, 4590 c->c_arena); 4591 if (!*expression) 4592 return -1; 4593 4594 return 0; 4595 4596 unexpected_end_of_string: 4597 ast_error(c, n, "f-string: expecting '}'"); 4598 return -1; 4599 } 4600 4601 /* Return -1 on error. 4602 4603 Return 0 if we have a literal (possible zero length) and an 4604 expression (zero length if at the end of the string. 4605 4606 Return 1 if we have a literal, but no expression, and we want the 4607 caller to call us again. This is used to deal with doubled 4608 braces. 4609 4610 When called multiple times on the string 'a{{b{0}c', this function 4611 will return: 4612 4613 1. the literal 'a{' with no expression, and a return value 4614 of 1. Despite the fact that there's no expression, the return 4615 value of 1 means we're not finished yet. 4616 4617 2. the literal 'b' and the expression '0', with a return value of 4618 0. The fact that there's an expression means we're not finished. 4619 4620 3. literal 'c' with no expression and a return value of 0. The 4621 combination of the return value of 0 with no expression means 4622 we're finished. 4623 */ 4624 static int 4625 fstring_find_literal_and_expr(const char **str, const char *end, int raw, 4626 int recurse_lvl, PyObject **literal, 4627 expr_ty *expression, 4628 struct compiling *c, const node *n) 4629 { 4630 int result; 4631 4632 assert(*literal == NULL && *expression == NULL); 4633 4634 /* Get any literal string. */ 4635 result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n); 4636 if (result < 0) 4637 goto error; 4638 4639 assert(result == 0 || result == 1); 4640 4641 if (result == 1) 4642 /* We have a literal, but don't look at the expression. */ 4643 return 1; 4644 4645 if (*str >= end || **str == '}') 4646 /* We're at the end of the string or the end of a nested 4647 f-string: no expression. The top-level error case where we 4648 expect to be at the end of the string but we're at a '}' is 4649 handled later. */ 4650 return 0; 4651 4652 /* We must now be the start of an expression, on a '{'. */ 4653 assert(**str == '{'); 4654 4655 if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0) 4656 goto error; 4657 4658 return 0; 4659 4660 error: 4661 Py_CLEAR(*literal); 4662 return -1; 4663 } 4664 4665 #define EXPRLIST_N_CACHED 64 4666 4667 typedef struct { 4668 /* Incrementally build an array of expr_ty, so be used in an 4669 asdl_seq. Cache some small but reasonably sized number of 4670 expr_ty's, and then after that start dynamically allocating, 4671 doubling the number allocated each time. Note that the f-string 4672 f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one 4673 Str for the literal 'a'. So you add expr_ty's about twice as 4674 fast as you add exressions in an f-string. */ 4675 4676 Py_ssize_t allocated; /* Number we've allocated. */ 4677 Py_ssize_t size; /* Number we've used. */ 4678 expr_ty *p; /* Pointer to the memory we're actually 4679 using. Will point to 'data' until we 4680 start dynamically allocating. */ 4681 expr_ty data[EXPRLIST_N_CACHED]; 4682 } ExprList; 4683 4684 #ifdef NDEBUG 4685 #define ExprList_check_invariants(l) 4686 #else 4687 static void 4688 ExprList_check_invariants(ExprList *l) 4689 { 4690 /* Check our invariants. Make sure this object is "live", and 4691 hasn't been deallocated. */ 4692 assert(l->size >= 0); 4693 assert(l->p != NULL); 4694 if (l->size <= EXPRLIST_N_CACHED) 4695 assert(l->data == l->p); 4696 } 4697 #endif 4698 4699 static void 4700 ExprList_Init(ExprList *l) 4701 { 4702 l->allocated = EXPRLIST_N_CACHED; 4703 l->size = 0; 4704 4705 /* Until we start allocating dynamically, p points to data. */ 4706 l->p = l->data; 4707 4708 ExprList_check_invariants(l); 4709 } 4710 4711 static int 4712 ExprList_Append(ExprList *l, expr_ty exp) 4713 { 4714 ExprList_check_invariants(l); 4715 if (l->size >= l->allocated) { 4716 /* We need to alloc (or realloc) the memory. */ 4717 Py_ssize_t new_size = l->allocated * 2; 4718 4719 /* See if we've ever allocated anything dynamically. */ 4720 if (l->p == l->data) { 4721 Py_ssize_t i; 4722 /* We're still using the cached data. Switch to 4723 alloc-ing. */ 4724 l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size); 4725 if (!l->p) 4726 return -1; 4727 /* Copy the cached data into the new buffer. */ 4728 for (i = 0; i < l->size; i++) 4729 l->p[i] = l->data[i]; 4730 } else { 4731 /* Just realloc. */ 4732 expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size); 4733 if (!tmp) { 4734 PyMem_RawFree(l->p); 4735 l->p = NULL; 4736 return -1; 4737 } 4738 l->p = tmp; 4739 } 4740 4741 l->allocated = new_size; 4742 assert(l->allocated == 2 * l->size); 4743 } 4744 4745 l->p[l->size++] = exp; 4746 4747 ExprList_check_invariants(l); 4748 return 0; 4749 } 4750 4751 static void 4752 ExprList_Dealloc(ExprList *l) 4753 { 4754 ExprList_check_invariants(l); 4755 4756 /* If there's been an error, or we've never dynamically allocated, 4757 do nothing. */ 4758 if (!l->p || l->p == l->data) { 4759 /* Do nothing. */ 4760 } else { 4761 /* We have dynamically allocated. Free the memory. */ 4762 PyMem_RawFree(l->p); 4763 } 4764 l->p = NULL; 4765 l->size = -1; 4766 } 4767 4768 static asdl_seq * 4769 ExprList_Finish(ExprList *l, PyArena *arena) 4770 { 4771 asdl_seq *seq; 4772 4773 ExprList_check_invariants(l); 4774 4775 /* Allocate the asdl_seq and copy the expressions in to it. */ 4776 seq = _Py_asdl_seq_new(l->size, arena); 4777 if (seq) { 4778 Py_ssize_t i; 4779 for (i = 0; i < l->size; i++) 4780 asdl_seq_SET(seq, i, l->p[i]); 4781 } 4782 ExprList_Dealloc(l); 4783 return seq; 4784 } 4785 4786 /* The FstringParser is designed to add a mix of strings and 4787 f-strings, and concat them together as needed. Ultimately, it 4788 generates an expr_ty. */ 4789 typedef struct { 4790 PyObject *last_str; 4791 ExprList expr_list; 4792 int fmode; 4793 } FstringParser; 4794 4795 #ifdef NDEBUG 4796 #define FstringParser_check_invariants(state) 4797 #else 4798 static void 4799 FstringParser_check_invariants(FstringParser *state) 4800 { 4801 if (state->last_str) 4802 assert(PyUnicode_CheckExact(state->last_str)); 4803 ExprList_check_invariants(&state->expr_list); 4804 } 4805 #endif 4806 4807 static void 4808 FstringParser_Init(FstringParser *state) 4809 { 4810 state->last_str = NULL; 4811 state->fmode = 0; 4812 ExprList_Init(&state->expr_list); 4813 FstringParser_check_invariants(state); 4814 } 4815 4816 static void 4817 FstringParser_Dealloc(FstringParser *state) 4818 { 4819 FstringParser_check_invariants(state); 4820 4821 Py_XDECREF(state->last_str); 4822 ExprList_Dealloc(&state->expr_list); 4823 } 4824 4825 /* Make a Str node, but decref the PyUnicode object being added. */ 4826 static expr_ty 4827 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n) 4828 { 4829 PyObject *s = *str; 4830 *str = NULL; 4831 assert(PyUnicode_CheckExact(s)); 4832 if (PyArena_AddPyObject(c->c_arena, s) < 0) { 4833 Py_DECREF(s); 4834 return NULL; 4835 } 4836 return Str(s, LINENO(n), n->n_col_offset, c->c_arena); 4837 } 4838 4839 /* Add a non-f-string (that is, a regular literal string). str is 4840 decref'd. */ 4841 static int 4842 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str) 4843 { 4844 FstringParser_check_invariants(state); 4845 4846 assert(PyUnicode_CheckExact(str)); 4847 4848 if (PyUnicode_GET_LENGTH(str) == 0) { 4849 Py_DECREF(str); 4850 return 0; 4851 } 4852 4853 if (!state->last_str) { 4854 /* We didn't have a string before, so just remember this one. */ 4855 state->last_str = str; 4856 } else { 4857 /* Concatenate this with the previous string. */ 4858 PyUnicode_AppendAndDel(&state->last_str, str); 4859 if (!state->last_str) 4860 return -1; 4861 } 4862 FstringParser_check_invariants(state); 4863 return 0; 4864 } 4865 4866 /* Parse an f-string. The f-string is in *str to end, with no 4867 'f' or quotes. */ 4868 static int 4869 FstringParser_ConcatFstring(FstringParser *state, const char **str, 4870 const char *end, int raw, int recurse_lvl, 4871 struct compiling *c, const node *n) 4872 { 4873 FstringParser_check_invariants(state); 4874 state->fmode = 1; 4875 4876 /* Parse the f-string. */ 4877 while (1) { 4878 PyObject *literal = NULL; 4879 expr_ty expression = NULL; 4880 4881 /* If there's a zero length literal in front of the 4882 expression, literal will be NULL. If we're at the end of 4883 the f-string, expression will be NULL (unless result == 1, 4884 see below). */ 4885 int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl, 4886 &literal, &expression, 4887 c, n); 4888 if (result < 0) 4889 return -1; 4890 4891 /* Add the literal, if any. */ 4892 if (!literal) { 4893 /* Do nothing. Just leave last_str alone (and possibly 4894 NULL). */ 4895 } else if (!state->last_str) { 4896 state->last_str = literal; 4897 literal = NULL; 4898 } else { 4899 /* We have a literal, concatenate it. */ 4900 assert(PyUnicode_GET_LENGTH(literal) != 0); 4901 if (FstringParser_ConcatAndDel(state, literal) < 0) 4902 return -1; 4903 literal = NULL; 4904 } 4905 assert(!state->last_str || 4906 PyUnicode_GET_LENGTH(state->last_str) != 0); 4907 4908 /* We've dealt with the literal now. It can't be leaked on further 4909 errors. */ 4910 assert(literal == NULL); 4911 4912 /* See if we should just loop around to get the next literal 4913 and expression, while ignoring the expression this 4914 time. This is used for un-doubling braces, as an 4915 optimization. */ 4916 if (result == 1) 4917 continue; 4918 4919 if (!expression) 4920 /* We're done with this f-string. */ 4921 break; 4922 4923 /* We know we have an expression. Convert any existing string 4924 to a Str node. */ 4925 if (!state->last_str) { 4926 /* Do nothing. No previous literal. */ 4927 } else { 4928 /* Convert the existing last_str literal to a Str node. */ 4929 expr_ty str = make_str_node_and_del(&state->last_str, c, n); 4930 if (!str || ExprList_Append(&state->expr_list, str) < 0) 4931 return -1; 4932 } 4933 4934 if (ExprList_Append(&state->expr_list, expression) < 0) 4935 return -1; 4936 } 4937 4938 /* If recurse_lvl is zero, then we must be at the end of the 4939 string. Otherwise, we must be at a right brace. */ 4940 4941 if (recurse_lvl == 0 && *str < end-1) { 4942 ast_error(c, n, "f-string: unexpected end of string"); 4943 return -1; 4944 } 4945 if (recurse_lvl != 0 && **str != '}') { 4946 ast_error(c, n, "f-string: expecting '}'"); 4947 return -1; 4948 } 4949 4950 FstringParser_check_invariants(state); 4951 return 0; 4952 } 4953 4954 /* Convert the partial state reflected in last_str and expr_list to an 4955 expr_ty. The expr_ty can be a Str, or a JoinedStr. */ 4956 static expr_ty 4957 FstringParser_Finish(FstringParser *state, struct compiling *c, 4958 const node *n) 4959 { 4960 asdl_seq *seq; 4961 4962 FstringParser_check_invariants(state); 4963 4964 /* If we're just a constant string with no expressions, return 4965 that. */ 4966 if (!state->fmode) { 4967 assert(!state->expr_list.size); 4968 if (!state->last_str) { 4969 /* Create a zero length string. */ 4970 state->last_str = PyUnicode_FromStringAndSize(NULL, 0); 4971 if (!state->last_str) 4972 goto error; 4973 } 4974 return make_str_node_and_del(&state->last_str, c, n); 4975 } 4976 4977 /* Create a Str node out of last_str, if needed. It will be the 4978 last node in our expression list. */ 4979 if (state->last_str) { 4980 expr_ty str = make_str_node_and_del(&state->last_str, c, n); 4981 if (!str || ExprList_Append(&state->expr_list, str) < 0) 4982 goto error; 4983 } 4984 /* This has already been freed. */ 4985 assert(state->last_str == NULL); 4986 4987 seq = ExprList_Finish(&state->expr_list, c->c_arena); 4988 if (!seq) 4989 goto error; 4990 4991 return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena); 4992 4993 error: 4994 FstringParser_Dealloc(state); 4995 return NULL; 4996 } 4997 4998 /* Given an f-string (with no 'f' or quotes) that's in *str and ends 4999 at end, parse it into an expr_ty. Return NULL on error. Adjust 5000 str to point past the parsed portion. */ 5001 static expr_ty 5002 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl, 5003 struct compiling *c, const node *n) 5004 { 5005 FstringParser state; 5006 5007 FstringParser_Init(&state); 5008 if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl, 5009 c, n) < 0) { 5010 FstringParser_Dealloc(&state); 5011 return NULL; 5012 } 5013 5014 return FstringParser_Finish(&state, c, n); 5015 } 5016 5017 /* n is a Python string literal, including the bracketing quote 5018 characters, and r, b, u, &/or f prefixes (if any), and embedded 5019 escape sequences (if any). parsestr parses it, and sets *result to 5020 decoded Python string object. If the string is an f-string, set 5021 *fstr and *fstrlen to the unparsed string object. Return 0 if no 5022 errors occurred. 5023 */ 5024 static int 5025 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode, 5026 PyObject **result, const char **fstr, Py_ssize_t *fstrlen) 5027 { 5028 size_t len; 5029 const char *s = STR(n); 5030 int quote = Py_CHARMASK(*s); 5031 int fmode = 0; 5032 *bytesmode = 0; 5033 *rawmode = 0; 5034 *result = NULL; 5035 *fstr = NULL; 5036 if (Py_ISALPHA(quote)) { 5037 while (!*bytesmode || !*rawmode) { 5038 if (quote == 'b' || quote == 'B') { 5039 quote = *++s; 5040 *bytesmode = 1; 5041 } 5042 else if (quote == 'u' || quote == 'U') { 5043 quote = *++s; 5044 } 5045 else if (quote == 'r' || quote == 'R') { 5046 quote = *++s; 5047 *rawmode = 1; 5048 } 5049 else if (quote == 'f' || quote == 'F') { 5050 quote = *++s; 5051 fmode = 1; 5052 } 5053 else { 5054 break; 5055 } 5056 } 5057 } 5058 if (fmode && *bytesmode) { 5059 PyErr_BadInternalCall(); 5060 return -1; 5061 } 5062 if (quote != '\'' && quote != '\"') { 5063 PyErr_BadInternalCall(); 5064 return -1; 5065 } 5066 /* Skip the leading quote char. */ 5067 s++; 5068 len = strlen(s); 5069 if (len > INT_MAX) { 5070 PyErr_SetString(PyExc_OverflowError, 5071 "string to parse is too long"); 5072 return -1; 5073 } 5074 if (s[--len] != quote) { 5075 /* Last quote char must match the first. */ 5076 PyErr_BadInternalCall(); 5077 return -1; 5078 } 5079 if (len >= 4 && s[0] == quote && s[1] == quote) { 5080 /* A triple quoted string. We've already skipped one quote at 5081 the start and one at the end of the string. Now skip the 5082 two at the start. */ 5083 s += 2; 5084 len -= 2; 5085 /* And check that the last two match. */ 5086 if (s[--len] != quote || s[--len] != quote) { 5087 PyErr_BadInternalCall(); 5088 return -1; 5089 } 5090 } 5091 5092 if (fmode) { 5093 /* Just return the bytes. The caller will parse the resulting 5094 string. */ 5095 *fstr = s; 5096 *fstrlen = len; 5097 return 0; 5098 } 5099 5100 /* Not an f-string. */ 5101 /* Avoid invoking escape decoding routines if possible. */ 5102 *rawmode = *rawmode || strchr(s, '\\') == NULL; 5103 if (*bytesmode) { 5104 /* Disallow non-ASCII characters. */ 5105 const char *ch; 5106 for (ch = s; *ch; ch++) { 5107 if (Py_CHARMASK(*ch) >= 0x80) { 5108 ast_error(c, n, "bytes can only contain ASCII " 5109 "literal characters."); 5110 return -1; 5111 } 5112 } 5113 if (*rawmode) 5114 *result = PyBytes_FromStringAndSize(s, len); 5115 else 5116 *result = decode_bytes_with_escapes(c, n, s, len); 5117 } else { 5118 if (*rawmode) 5119 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); 5120 else 5121 *result = decode_unicode_with_escapes(c, n, s, len); 5122 } 5123 return *result == NULL ? -1 : 0; 5124 } 5125 5126 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through 5127 each STRING atom, and process it as needed. For bytes, just 5128 concatenate them together, and the result will be a Bytes node. For 5129 normal strings and f-strings, concatenate them together. The result 5130 will be a Str node if there were no f-strings; a FormattedValue 5131 node if there's just an f-string (with no leading or trailing 5132 literals), or a JoinedStr node if there are multiple f-strings or 5133 any literals involved. */ 5134 static expr_ty 5135 parsestrplus(struct compiling *c, const node *n) 5136 { 5137 int bytesmode = 0; 5138 PyObject *bytes_str = NULL; 5139 int i; 5140 5141 FstringParser state; 5142 FstringParser_Init(&state); 5143 5144 for (i = 0; i < NCH(n); i++) { 5145 int this_bytesmode; 5146 int this_rawmode; 5147 PyObject *s; 5148 const char *fstr; 5149 Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */ 5150 5151 REQ(CHILD(n, i), STRING); 5152 if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s, 5153 &fstr, &fstrlen) != 0) 5154 goto error; 5155 5156 /* Check that we're not mixing bytes with unicode. */ 5157 if (i != 0 && bytesmode != this_bytesmode) { 5158 ast_error(c, n, "cannot mix bytes and nonbytes literals"); 5159 /* s is NULL if the current string part is an f-string. */ 5160 Py_XDECREF(s); 5161 goto error; 5162 } 5163 bytesmode = this_bytesmode; 5164 5165 if (fstr != NULL) { 5166 int result; 5167 assert(s == NULL && !bytesmode); 5168 /* This is an f-string. Parse and concatenate it. */ 5169 result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen, 5170 this_rawmode, 0, c, n); 5171 if (result < 0) 5172 goto error; 5173 } else { 5174 /* A string or byte string. */ 5175 assert(s != NULL && fstr == NULL); 5176 5177 assert(bytesmode ? PyBytes_CheckExact(s) : 5178 PyUnicode_CheckExact(s)); 5179 5180 if (bytesmode) { 5181 /* For bytes, concat as we go. */ 5182 if (i == 0) { 5183 /* First time, just remember this value. */ 5184 bytes_str = s; 5185 } else { 5186 PyBytes_ConcatAndDel(&bytes_str, s); 5187 if (!bytes_str) 5188 goto error; 5189 } 5190 } else { 5191 /* This is a regular string. Concatenate it. */ 5192 if (FstringParser_ConcatAndDel(&state, s) < 0) 5193 goto error; 5194 } 5195 } 5196 } 5197 if (bytesmode) { 5198 /* Just return the bytes object and we're done. */ 5199 if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0) 5200 goto error; 5201 return Bytes(bytes_str, LINENO(n), n->n_col_offset, c->c_arena); 5202 } 5203 5204 /* We're not a bytes string, bytes_str should never have been set. */ 5205 assert(bytes_str == NULL); 5206 5207 return FstringParser_Finish(&state, c, n); 5208 5209 error: 5210 Py_XDECREF(bytes_str); 5211 FstringParser_Dealloc(&state); 5212 return NULL; 5213 } 5214