1 /* csv module */ 2 3 /* 4 5 This module provides the low-level underpinnings of a CSV reading/writing 6 module. Users should not use this module directly, but import the csv.py 7 module instead. 8 9 */ 10 11 #define MODULE_VERSION "1.0" 12 13 #include "Python.h" 14 #include "structmember.h" 15 16 17 typedef struct { 18 PyObject *error_obj; /* CSV exception */ 19 PyObject *dialects; /* Dialect registry */ 20 long field_limit; /* max parsed field size */ 21 } _csvstate; 22 23 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o)) 24 25 static int 26 _csv_clear(PyObject *m) 27 { 28 Py_CLEAR(_csvstate(m)->error_obj); 29 Py_CLEAR(_csvstate(m)->dialects); 30 return 0; 31 } 32 33 static int 34 _csv_traverse(PyObject *m, visitproc visit, void *arg) 35 { 36 Py_VISIT(_csvstate(m)->error_obj); 37 Py_VISIT(_csvstate(m)->dialects); 38 return 0; 39 } 40 41 static void 42 _csv_free(void *m) 43 { 44 _csv_clear((PyObject *)m); 45 } 46 47 static struct PyModuleDef _csvmodule; 48 49 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule))) 50 51 typedef enum { 52 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, 54 EAT_CRNL,AFTER_ESCAPED_CRNL 55 } ParserState; 56 57 typedef enum { 58 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE 59 } QuoteStyle; 60 61 typedef struct { 62 QuoteStyle style; 63 const char *name; 64 } StyleDesc; 65 66 static const StyleDesc quote_styles[] = { 67 { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, 68 { QUOTE_ALL, "QUOTE_ALL" }, 69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, 70 { QUOTE_NONE, "QUOTE_NONE" }, 71 { 0 } 72 }; 73 74 typedef struct { 75 PyObject_HEAD 76 77 int doublequote; /* is " represented by ""? */ 78 Py_UCS4 delimiter; /* field separator */ 79 Py_UCS4 quotechar; /* quote character */ 80 Py_UCS4 escapechar; /* escape character */ 81 int skipinitialspace; /* ignore spaces following delimiter? */ 82 PyObject *lineterminator; /* string to write between records */ 83 int quoting; /* style of quoting to write */ 84 85 int strict; /* raise exception on bad CSV */ 86 } DialectObj; 87 88 static PyTypeObject Dialect_Type; 89 90 typedef struct { 91 PyObject_HEAD 92 93 PyObject *input_iter; /* iterate over this for input lines */ 94 95 DialectObj *dialect; /* parsing dialect */ 96 97 PyObject *fields; /* field list for current record */ 98 ParserState state; /* current CSV parse state */ 99 Py_UCS4 *field; /* temporary buffer */ 100 Py_ssize_t field_size; /* size of allocated buffer */ 101 Py_ssize_t field_len; /* length of current field */ 102 int numeric_field; /* treat field as numeric */ 103 unsigned long line_num; /* Source-file line number */ 104 } ReaderObj; 105 106 static PyTypeObject Reader_Type; 107 108 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type) 109 110 typedef struct { 111 PyObject_HEAD 112 113 PyObject *writeline; /* write output lines to this file */ 114 115 DialectObj *dialect; /* parsing dialect */ 116 117 Py_UCS4 *rec; /* buffer for parser.join */ 118 Py_ssize_t rec_size; /* size of allocated record */ 119 Py_ssize_t rec_len; /* length of record */ 120 int num_fields; /* number of fields in record */ 121 } WriterObj; 122 123 static PyTypeObject Writer_Type; 124 125 /* 126 * DIALECT class 127 */ 128 129 static PyObject * 130 get_dialect_from_registry(PyObject * name_obj) 131 { 132 PyObject *dialect_obj; 133 134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj); 135 if (dialect_obj == NULL) { 136 if (!PyErr_Occurred()) 137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); 138 } 139 else 140 Py_INCREF(dialect_obj); 141 return dialect_obj; 142 } 143 144 static PyObject * 145 get_string(PyObject *str) 146 { 147 Py_XINCREF(str); 148 return str; 149 } 150 151 static PyObject * 152 get_nullchar_as_None(Py_UCS4 c) 153 { 154 if (c == '\0') { 155 Py_INCREF(Py_None); 156 return Py_None; 157 } 158 else 159 return PyUnicode_FromOrdinal(c); 160 } 161 162 static PyObject * 163 Dialect_get_lineterminator(DialectObj *self) 164 { 165 return get_string(self->lineterminator); 166 } 167 168 static PyObject * 169 Dialect_get_delimiter(DialectObj *self) 170 { 171 return get_nullchar_as_None(self->delimiter); 172 } 173 174 static PyObject * 175 Dialect_get_escapechar(DialectObj *self) 176 { 177 return get_nullchar_as_None(self->escapechar); 178 } 179 180 static PyObject * 181 Dialect_get_quotechar(DialectObj *self) 182 { 183 return get_nullchar_as_None(self->quotechar); 184 } 185 186 static PyObject * 187 Dialect_get_quoting(DialectObj *self) 188 { 189 return PyLong_FromLong(self->quoting); 190 } 191 192 static int 193 _set_bool(const char *name, int *target, PyObject *src, int dflt) 194 { 195 if (src == NULL) 196 *target = dflt; 197 else { 198 int b = PyObject_IsTrue(src); 199 if (b < 0) 200 return -1; 201 *target = b; 202 } 203 return 0; 204 } 205 206 static int 207 _set_int(const char *name, int *target, PyObject *src, int dflt) 208 { 209 if (src == NULL) 210 *target = dflt; 211 else { 212 long value; 213 if (!PyLong_CheckExact(src)) { 214 PyErr_Format(PyExc_TypeError, 215 "\"%s\" must be an integer", name); 216 return -1; 217 } 218 value = PyLong_AsLong(src); 219 if (value == -1 && PyErr_Occurred()) 220 return -1; 221 #if SIZEOF_LONG > SIZEOF_INT 222 if (value > INT_MAX || value < INT_MIN) { 223 PyErr_Format(PyExc_ValueError, 224 "integer out of range for \"%s\"", name); 225 return -1; 226 } 227 #endif 228 *target = (int)value; 229 } 230 return 0; 231 } 232 233 static int 234 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) 235 { 236 if (src == NULL) 237 *target = dflt; 238 else { 239 *target = '\0'; 240 if (src != Py_None) { 241 Py_ssize_t len; 242 if (!PyUnicode_Check(src)) { 243 PyErr_Format(PyExc_TypeError, 244 "\"%s\" must be string, not %.200s", name, 245 src->ob_type->tp_name); 246 return -1; 247 } 248 len = PyUnicode_GetLength(src); 249 if (len > 1) { 250 PyErr_Format(PyExc_TypeError, 251 "\"%s\" must be a 1-character string", 252 name); 253 return -1; 254 } 255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */ 256 if (len > 0) 257 *target = PyUnicode_READ_CHAR(src, 0); 258 } 259 } 260 return 0; 261 } 262 263 static int 264 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) 265 { 266 if (src == NULL) 267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); 268 else { 269 if (src == Py_None) 270 *target = NULL; 271 else if (!PyUnicode_Check(src)) { 272 PyErr_Format(PyExc_TypeError, 273 "\"%s\" must be a string", name); 274 return -1; 275 } 276 else { 277 if (PyUnicode_READY(src) == -1) 278 return -1; 279 Py_INCREF(src); 280 Py_XSETREF(*target, src); 281 } 282 } 283 return 0; 284 } 285 286 static int 287 dialect_check_quoting(int quoting) 288 { 289 const StyleDesc *qs; 290 291 for (qs = quote_styles; qs->name; qs++) { 292 if ((int)qs->style == quoting) 293 return 0; 294 } 295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); 296 return -1; 297 } 298 299 #define D_OFF(x) offsetof(DialectObj, x) 300 301 static struct PyMemberDef Dialect_memberlist[] = { 302 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY }, 303 { "doublequote", T_INT, D_OFF(doublequote), READONLY }, 304 { "strict", T_INT, D_OFF(strict), READONLY }, 305 { NULL } 306 }; 307 308 static PyGetSetDef Dialect_getsetlist[] = { 309 { "delimiter", (getter)Dialect_get_delimiter}, 310 { "escapechar", (getter)Dialect_get_escapechar}, 311 { "lineterminator", (getter)Dialect_get_lineterminator}, 312 { "quotechar", (getter)Dialect_get_quotechar}, 313 { "quoting", (getter)Dialect_get_quoting}, 314 {NULL}, 315 }; 316 317 static void 318 Dialect_dealloc(DialectObj *self) 319 { 320 Py_XDECREF(self->lineterminator); 321 Py_TYPE(self)->tp_free((PyObject *)self); 322 } 323 324 static char *dialect_kws[] = { 325 "dialect", 326 "delimiter", 327 "doublequote", 328 "escapechar", 329 "lineterminator", 330 "quotechar", 331 "quoting", 332 "skipinitialspace", 333 "strict", 334 NULL 335 }; 336 337 static PyObject * 338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) 339 { 340 DialectObj *self; 341 PyObject *ret = NULL; 342 PyObject *dialect = NULL; 343 PyObject *delimiter = NULL; 344 PyObject *doublequote = NULL; 345 PyObject *escapechar = NULL; 346 PyObject *lineterminator = NULL; 347 PyObject *quotechar = NULL; 348 PyObject *quoting = NULL; 349 PyObject *skipinitialspace = NULL; 350 PyObject *strict = NULL; 351 352 if (!PyArg_ParseTupleAndKeywords(args, kwargs, 353 "|OOOOOOOOO", dialect_kws, 354 &dialect, 355 &delimiter, 356 &doublequote, 357 &escapechar, 358 &lineterminator, 359 "echar, 360 "ing, 361 &skipinitialspace, 362 &strict)) 363 return NULL; 364 365 if (dialect != NULL) { 366 if (PyUnicode_Check(dialect)) { 367 dialect = get_dialect_from_registry(dialect); 368 if (dialect == NULL) 369 return NULL; 370 } 371 else 372 Py_INCREF(dialect); 373 /* Can we reuse this instance? */ 374 if (PyObject_TypeCheck(dialect, &Dialect_Type) && 375 delimiter == 0 && 376 doublequote == 0 && 377 escapechar == 0 && 378 lineterminator == 0 && 379 quotechar == 0 && 380 quoting == 0 && 381 skipinitialspace == 0 && 382 strict == 0) 383 return dialect; 384 } 385 386 self = (DialectObj *)type->tp_alloc(type, 0); 387 if (self == NULL) { 388 Py_XDECREF(dialect); 389 return NULL; 390 } 391 self->lineterminator = NULL; 392 393 Py_XINCREF(delimiter); 394 Py_XINCREF(doublequote); 395 Py_XINCREF(escapechar); 396 Py_XINCREF(lineterminator); 397 Py_XINCREF(quotechar); 398 Py_XINCREF(quoting); 399 Py_XINCREF(skipinitialspace); 400 Py_XINCREF(strict); 401 if (dialect != NULL) { 402 #define DIALECT_GETATTR(v, n) \ 403 if (v == NULL) \ 404 v = PyObject_GetAttrString(dialect, n) 405 DIALECT_GETATTR(delimiter, "delimiter"); 406 DIALECT_GETATTR(doublequote, "doublequote"); 407 DIALECT_GETATTR(escapechar, "escapechar"); 408 DIALECT_GETATTR(lineterminator, "lineterminator"); 409 DIALECT_GETATTR(quotechar, "quotechar"); 410 DIALECT_GETATTR(quoting, "quoting"); 411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); 412 DIALECT_GETATTR(strict, "strict"); 413 PyErr_Clear(); 414 } 415 416 /* check types and convert to C values */ 417 #define DIASET(meth, name, target, src, dflt) \ 418 if (meth(name, target, src, dflt)) \ 419 goto err 420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); 421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); 422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); 423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); 424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); 425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); 426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); 427 DIASET(_set_bool, "strict", &self->strict, strict, 0); 428 429 /* validate options */ 430 if (dialect_check_quoting(self->quoting)) 431 goto err; 432 if (self->delimiter == 0) { 433 PyErr_SetString(PyExc_TypeError, 434 "\"delimiter\" must be a 1-character string"); 435 goto err; 436 } 437 if (quotechar == Py_None && quoting == NULL) 438 self->quoting = QUOTE_NONE; 439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) { 440 PyErr_SetString(PyExc_TypeError, 441 "quotechar must be set if quoting enabled"); 442 goto err; 443 } 444 if (self->lineterminator == 0) { 445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); 446 goto err; 447 } 448 449 ret = (PyObject *)self; 450 Py_INCREF(self); 451 err: 452 Py_XDECREF(self); 453 Py_XDECREF(dialect); 454 Py_XDECREF(delimiter); 455 Py_XDECREF(doublequote); 456 Py_XDECREF(escapechar); 457 Py_XDECREF(lineterminator); 458 Py_XDECREF(quotechar); 459 Py_XDECREF(quoting); 460 Py_XDECREF(skipinitialspace); 461 Py_XDECREF(strict); 462 return ret; 463 } 464 465 466 PyDoc_STRVAR(Dialect_Type_doc, 467 "CSV dialect\n" 468 "\n" 469 "The Dialect type records CSV parsing and generation options.\n"); 470 471 static PyTypeObject Dialect_Type = { 472 PyVarObject_HEAD_INIT(NULL, 0) 473 "_csv.Dialect", /* tp_name */ 474 sizeof(DialectObj), /* tp_basicsize */ 475 0, /* tp_itemsize */ 476 /* methods */ 477 (destructor)Dialect_dealloc, /* tp_dealloc */ 478 (printfunc)0, /* tp_print */ 479 (getattrfunc)0, /* tp_getattr */ 480 (setattrfunc)0, /* tp_setattr */ 481 0, /* tp_reserved */ 482 (reprfunc)0, /* tp_repr */ 483 0, /* tp_as_number */ 484 0, /* tp_as_sequence */ 485 0, /* tp_as_mapping */ 486 (hashfunc)0, /* tp_hash */ 487 (ternaryfunc)0, /* tp_call */ 488 (reprfunc)0, /* tp_str */ 489 0, /* tp_getattro */ 490 0, /* tp_setattro */ 491 0, /* tp_as_buffer */ 492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ 493 Dialect_Type_doc, /* tp_doc */ 494 0, /* tp_traverse */ 495 0, /* tp_clear */ 496 0, /* tp_richcompare */ 497 0, /* tp_weaklistoffset */ 498 0, /* tp_iter */ 499 0, /* tp_iternext */ 500 0, /* tp_methods */ 501 Dialect_memberlist, /* tp_members */ 502 Dialect_getsetlist, /* tp_getset */ 503 0, /* tp_base */ 504 0, /* tp_dict */ 505 0, /* tp_descr_get */ 506 0, /* tp_descr_set */ 507 0, /* tp_dictoffset */ 508 0, /* tp_init */ 509 0, /* tp_alloc */ 510 dialect_new, /* tp_new */ 511 0, /* tp_free */ 512 }; 513 514 /* 515 * Return an instance of the dialect type, given a Python instance or kwarg 516 * description of the dialect 517 */ 518 static PyObject * 519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs) 520 { 521 PyObject *type = (PyObject *)&Dialect_Type; 522 if (dialect_inst) { 523 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs); 524 } 525 else { 526 return _PyObject_FastCallDict(type, NULL, 0, kwargs); 527 } 528 } 529 530 /* 531 * READER 532 */ 533 static int 534 parse_save_field(ReaderObj *self) 535 { 536 PyObject *field; 537 538 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 539 (void *) self->field, self->field_len); 540 if (field == NULL) 541 return -1; 542 self->field_len = 0; 543 if (self->numeric_field) { 544 PyObject *tmp; 545 546 self->numeric_field = 0; 547 tmp = PyNumber_Float(field); 548 Py_DECREF(field); 549 if (tmp == NULL) 550 return -1; 551 field = tmp; 552 } 553 if (PyList_Append(self->fields, field) < 0) { 554 Py_DECREF(field); 555 return -1; 556 } 557 Py_DECREF(field); 558 return 0; 559 } 560 561 static int 562 parse_grow_buff(ReaderObj *self) 563 { 564 if (self->field_size == 0) { 565 self->field_size = 4096; 566 if (self->field != NULL) 567 PyMem_Free(self->field); 568 self->field = PyMem_New(Py_UCS4, self->field_size); 569 } 570 else { 571 Py_UCS4 *field = self->field; 572 if (self->field_size > PY_SSIZE_T_MAX / 2) { 573 PyErr_NoMemory(); 574 return 0; 575 } 576 self->field_size *= 2; 577 self->field = PyMem_Resize(field, Py_UCS4, self->field_size); 578 } 579 if (self->field == NULL) { 580 PyErr_NoMemory(); 581 return 0; 582 } 583 return 1; 584 } 585 586 static int 587 parse_add_char(ReaderObj *self, Py_UCS4 c) 588 { 589 if (self->field_len >= _csvstate_global->field_limit) { 590 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)", 591 _csvstate_global->field_limit); 592 return -1; 593 } 594 if (self->field_len == self->field_size && !parse_grow_buff(self)) 595 return -1; 596 self->field[self->field_len++] = c; 597 return 0; 598 } 599 600 static int 601 parse_process_char(ReaderObj *self, Py_UCS4 c) 602 { 603 DialectObj *dialect = self->dialect; 604 605 switch (self->state) { 606 case START_RECORD: 607 /* start of record */ 608 if (c == '\0') 609 /* empty line - return [] */ 610 break; 611 else if (c == '\n' || c == '\r') { 612 self->state = EAT_CRNL; 613 break; 614 } 615 /* normal character - handle as START_FIELD */ 616 self->state = START_FIELD; 617 /* fallthru */ 618 case START_FIELD: 619 /* expecting field */ 620 if (c == '\n' || c == '\r' || c == '\0') { 621 /* save empty field - return [fields] */ 622 if (parse_save_field(self) < 0) 623 return -1; 624 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); 625 } 626 else if (c == dialect->quotechar && 627 dialect->quoting != QUOTE_NONE) { 628 /* start quoted field */ 629 self->state = IN_QUOTED_FIELD; 630 } 631 else if (c == dialect->escapechar) { 632 /* possible escaped character */ 633 self->state = ESCAPED_CHAR; 634 } 635 else if (c == ' ' && dialect->skipinitialspace) 636 /* ignore space at start of field */ 637 ; 638 else if (c == dialect->delimiter) { 639 /* save empty field */ 640 if (parse_save_field(self) < 0) 641 return -1; 642 } 643 else { 644 /* begin new unquoted field */ 645 if (dialect->quoting == QUOTE_NONNUMERIC) 646 self->numeric_field = 1; 647 if (parse_add_char(self, c) < 0) 648 return -1; 649 self->state = IN_FIELD; 650 } 651 break; 652 653 case ESCAPED_CHAR: 654 if (c == '\n' || c=='\r') { 655 if (parse_add_char(self, c) < 0) 656 return -1; 657 self->state = AFTER_ESCAPED_CRNL; 658 break; 659 } 660 if (c == '\0') 661 c = '\n'; 662 if (parse_add_char(self, c) < 0) 663 return -1; 664 self->state = IN_FIELD; 665 break; 666 667 case AFTER_ESCAPED_CRNL: 668 if (c == '\0') 669 break; 670 /*fallthru*/ 671 672 case IN_FIELD: 673 /* in unquoted field */ 674 if (c == '\n' || c == '\r' || c == '\0') { 675 /* end of line - return [fields] */ 676 if (parse_save_field(self) < 0) 677 return -1; 678 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); 679 } 680 else if (c == dialect->escapechar) { 681 /* possible escaped character */ 682 self->state = ESCAPED_CHAR; 683 } 684 else if (c == dialect->delimiter) { 685 /* save field - wait for new field */ 686 if (parse_save_field(self) < 0) 687 return -1; 688 self->state = START_FIELD; 689 } 690 else { 691 /* normal character - save in field */ 692 if (parse_add_char(self, c) < 0) 693 return -1; 694 } 695 break; 696 697 case IN_QUOTED_FIELD: 698 /* in quoted field */ 699 if (c == '\0') 700 ; 701 else if (c == dialect->escapechar) { 702 /* Possible escape character */ 703 self->state = ESCAPE_IN_QUOTED_FIELD; 704 } 705 else if (c == dialect->quotechar && 706 dialect->quoting != QUOTE_NONE) { 707 if (dialect->doublequote) { 708 /* doublequote; " represented by "" */ 709 self->state = QUOTE_IN_QUOTED_FIELD; 710 } 711 else { 712 /* end of quote part of field */ 713 self->state = IN_FIELD; 714 } 715 } 716 else { 717 /* normal character - save in field */ 718 if (parse_add_char(self, c) < 0) 719 return -1; 720 } 721 break; 722 723 case ESCAPE_IN_QUOTED_FIELD: 724 if (c == '\0') 725 c = '\n'; 726 if (parse_add_char(self, c) < 0) 727 return -1; 728 self->state = IN_QUOTED_FIELD; 729 break; 730 731 case QUOTE_IN_QUOTED_FIELD: 732 /* doublequote - seen a quote in a quoted field */ 733 if (dialect->quoting != QUOTE_NONE && 734 c == dialect->quotechar) { 735 /* save "" as " */ 736 if (parse_add_char(self, c) < 0) 737 return -1; 738 self->state = IN_QUOTED_FIELD; 739 } 740 else if (c == dialect->delimiter) { 741 /* save field - wait for new field */ 742 if (parse_save_field(self) < 0) 743 return -1; 744 self->state = START_FIELD; 745 } 746 else if (c == '\n' || c == '\r' || c == '\0') { 747 /* end of line - return [fields] */ 748 if (parse_save_field(self) < 0) 749 return -1; 750 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); 751 } 752 else if (!dialect->strict) { 753 if (parse_add_char(self, c) < 0) 754 return -1; 755 self->state = IN_FIELD; 756 } 757 else { 758 /* illegal */ 759 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'", 760 dialect->delimiter, 761 dialect->quotechar); 762 return -1; 763 } 764 break; 765 766 case EAT_CRNL: 767 if (c == '\n' || c == '\r') 768 ; 769 else if (c == '\0') 770 self->state = START_RECORD; 771 else { 772 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); 773 return -1; 774 } 775 break; 776 777 } 778 return 0; 779 } 780 781 static int 782 parse_reset(ReaderObj *self) 783 { 784 Py_XSETREF(self->fields, PyList_New(0)); 785 if (self->fields == NULL) 786 return -1; 787 self->field_len = 0; 788 self->state = START_RECORD; 789 self->numeric_field = 0; 790 return 0; 791 } 792 793 static PyObject * 794 Reader_iternext(ReaderObj *self) 795 { 796 PyObject *fields = NULL; 797 Py_UCS4 c; 798 Py_ssize_t pos, linelen; 799 unsigned int kind; 800 void *data; 801 PyObject *lineobj; 802 803 if (parse_reset(self) < 0) 804 return NULL; 805 do { 806 lineobj = PyIter_Next(self->input_iter); 807 if (lineobj == NULL) { 808 /* End of input OR exception */ 809 if (!PyErr_Occurred() && (self->field_len != 0 || 810 self->state == IN_QUOTED_FIELD)) { 811 if (self->dialect->strict) 812 PyErr_SetString(_csvstate_global->error_obj, 813 "unexpected end of data"); 814 else if (parse_save_field(self) >= 0) 815 break; 816 } 817 return NULL; 818 } 819 if (!PyUnicode_Check(lineobj)) { 820 PyErr_Format(_csvstate_global->error_obj, 821 "iterator should return strings, " 822 "not %.200s " 823 "(did you open the file in text mode?)", 824 lineobj->ob_type->tp_name 825 ); 826 Py_DECREF(lineobj); 827 return NULL; 828 } 829 if (PyUnicode_READY(lineobj) == -1) { 830 Py_DECREF(lineobj); 831 return NULL; 832 } 833 ++self->line_num; 834 kind = PyUnicode_KIND(lineobj); 835 data = PyUnicode_DATA(lineobj); 836 pos = 0; 837 linelen = PyUnicode_GET_LENGTH(lineobj); 838 while (linelen--) { 839 c = PyUnicode_READ(kind, data, pos); 840 if (c == '\0') { 841 Py_DECREF(lineobj); 842 PyErr_Format(_csvstate_global->error_obj, 843 "line contains NULL byte"); 844 goto err; 845 } 846 if (parse_process_char(self, c) < 0) { 847 Py_DECREF(lineobj); 848 goto err; 849 } 850 pos++; 851 } 852 Py_DECREF(lineobj); 853 if (parse_process_char(self, 0) < 0) 854 goto err; 855 } while (self->state != START_RECORD); 856 857 fields = self->fields; 858 self->fields = NULL; 859 err: 860 return fields; 861 } 862 863 static void 864 Reader_dealloc(ReaderObj *self) 865 { 866 PyObject_GC_UnTrack(self); 867 Py_XDECREF(self->dialect); 868 Py_XDECREF(self->input_iter); 869 Py_XDECREF(self->fields); 870 if (self->field != NULL) 871 PyMem_Free(self->field); 872 PyObject_GC_Del(self); 873 } 874 875 static int 876 Reader_traverse(ReaderObj *self, visitproc visit, void *arg) 877 { 878 Py_VISIT(self->dialect); 879 Py_VISIT(self->input_iter); 880 Py_VISIT(self->fields); 881 return 0; 882 } 883 884 static int 885 Reader_clear(ReaderObj *self) 886 { 887 Py_CLEAR(self->dialect); 888 Py_CLEAR(self->input_iter); 889 Py_CLEAR(self->fields); 890 return 0; 891 } 892 893 PyDoc_STRVAR(Reader_Type_doc, 894 "CSV reader\n" 895 "\n" 896 "Reader objects are responsible for reading and parsing tabular data\n" 897 "in CSV format.\n" 898 ); 899 900 static struct PyMethodDef Reader_methods[] = { 901 { NULL, NULL } 902 }; 903 #define R_OFF(x) offsetof(ReaderObj, x) 904 905 static struct PyMemberDef Reader_memberlist[] = { 906 { "dialect", T_OBJECT, R_OFF(dialect), READONLY }, 907 { "line_num", T_ULONG, R_OFF(line_num), READONLY }, 908 { NULL } 909 }; 910 911 912 static PyTypeObject Reader_Type = { 913 PyVarObject_HEAD_INIT(NULL, 0) 914 "_csv.reader", /*tp_name*/ 915 sizeof(ReaderObj), /*tp_basicsize*/ 916 0, /*tp_itemsize*/ 917 /* methods */ 918 (destructor)Reader_dealloc, /*tp_dealloc*/ 919 (printfunc)0, /*tp_print*/ 920 (getattrfunc)0, /*tp_getattr*/ 921 (setattrfunc)0, /*tp_setattr*/ 922 0, /*tp_reserved*/ 923 (reprfunc)0, /*tp_repr*/ 924 0, /*tp_as_number*/ 925 0, /*tp_as_sequence*/ 926 0, /*tp_as_mapping*/ 927 (hashfunc)0, /*tp_hash*/ 928 (ternaryfunc)0, /*tp_call*/ 929 (reprfunc)0, /*tp_str*/ 930 0, /*tp_getattro*/ 931 0, /*tp_setattro*/ 932 0, /*tp_as_buffer*/ 933 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 934 Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 935 Reader_Type_doc, /*tp_doc*/ 936 (traverseproc)Reader_traverse, /*tp_traverse*/ 937 (inquiry)Reader_clear, /*tp_clear*/ 938 0, /*tp_richcompare*/ 939 0, /*tp_weaklistoffset*/ 940 PyObject_SelfIter, /*tp_iter*/ 941 (getiterfunc)Reader_iternext, /*tp_iternext*/ 942 Reader_methods, /*tp_methods*/ 943 Reader_memberlist, /*tp_members*/ 944 0, /*tp_getset*/ 945 946 }; 947 948 static PyObject * 949 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) 950 { 951 PyObject * iterator, * dialect = NULL; 952 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); 953 954 if (!self) 955 return NULL; 956 957 self->dialect = NULL; 958 self->fields = NULL; 959 self->input_iter = NULL; 960 self->field = NULL; 961 self->field_size = 0; 962 self->line_num = 0; 963 964 if (parse_reset(self) < 0) { 965 Py_DECREF(self); 966 return NULL; 967 } 968 969 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { 970 Py_DECREF(self); 971 return NULL; 972 } 973 self->input_iter = PyObject_GetIter(iterator); 974 if (self->input_iter == NULL) { 975 PyErr_SetString(PyExc_TypeError, 976 "argument 1 must be an iterator"); 977 Py_DECREF(self); 978 return NULL; 979 } 980 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); 981 if (self->dialect == NULL) { 982 Py_DECREF(self); 983 return NULL; 984 } 985 986 PyObject_GC_Track(self); 987 return (PyObject *)self; 988 } 989 990 /* 991 * WRITER 992 */ 993 /* ---------------------------------------------------------------- */ 994 static void 995 join_reset(WriterObj *self) 996 { 997 self->rec_len = 0; 998 self->num_fields = 0; 999 } 1000 1001 #define MEM_INCR 32768 1002 1003 /* Calculate new record length or append field to record. Return new 1004 * record length. 1005 */ 1006 static Py_ssize_t 1007 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, 1008 Py_ssize_t field_len, int *quoted, 1009 int copy_phase) 1010 { 1011 DialectObj *dialect = self->dialect; 1012 int i; 1013 Py_ssize_t rec_len; 1014 1015 #define INCLEN \ 1016 do {\ 1017 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ 1018 goto overflow; \ 1019 } \ 1020 rec_len++; \ 1021 } while(0) 1022 1023 #define ADDCH(c) \ 1024 do {\ 1025 if (copy_phase) \ 1026 self->rec[rec_len] = c;\ 1027 INCLEN;\ 1028 } while(0) 1029 1030 rec_len = self->rec_len; 1031 1032 /* If this is not the first field we need a field separator */ 1033 if (self->num_fields > 0) 1034 ADDCH(dialect->delimiter); 1035 1036 /* Handle preceding quote */ 1037 if (copy_phase && *quoted) 1038 ADDCH(dialect->quotechar); 1039 1040 /* Copy/count field data */ 1041 /* If field is null just pass over */ 1042 for (i = 0; field_data && (i < field_len); i++) { 1043 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); 1044 int want_escape = 0; 1045 1046 if (c == dialect->delimiter || 1047 c == dialect->escapechar || 1048 c == dialect->quotechar || 1049 PyUnicode_FindChar( 1050 dialect->lineterminator, c, 0, 1051 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { 1052 if (dialect->quoting == QUOTE_NONE) 1053 want_escape = 1; 1054 else { 1055 if (c == dialect->quotechar) { 1056 if (dialect->doublequote) 1057 ADDCH(dialect->quotechar); 1058 else 1059 want_escape = 1; 1060 } 1061 if (!want_escape) 1062 *quoted = 1; 1063 } 1064 if (want_escape) { 1065 if (!dialect->escapechar) { 1066 PyErr_Format(_csvstate_global->error_obj, 1067 "need to escape, but no escapechar set"); 1068 return -1; 1069 } 1070 ADDCH(dialect->escapechar); 1071 } 1072 } 1073 /* Copy field character into record buffer. 1074 */ 1075 ADDCH(c); 1076 } 1077 1078 if (*quoted) { 1079 if (copy_phase) 1080 ADDCH(dialect->quotechar); 1081 else { 1082 INCLEN; /* starting quote */ 1083 INCLEN; /* ending quote */ 1084 } 1085 } 1086 return rec_len; 1087 1088 overflow: 1089 PyErr_NoMemory(); 1090 return -1; 1091 #undef ADDCH 1092 #undef INCLEN 1093 } 1094 1095 static int 1096 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len) 1097 { 1098 1099 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) { 1100 PyErr_NoMemory(); 1101 return 0; 1102 } 1103 1104 if (rec_len > self->rec_size) { 1105 if (self->rec_size == 0) { 1106 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; 1107 if (self->rec != NULL) 1108 PyMem_Free(self->rec); 1109 self->rec = PyMem_New(Py_UCS4, self->rec_size); 1110 } 1111 else { 1112 Py_UCS4* old_rec = self->rec; 1113 1114 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; 1115 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size); 1116 if (self->rec == NULL) 1117 PyMem_Free(old_rec); 1118 } 1119 if (self->rec == NULL) { 1120 PyErr_NoMemory(); 1121 return 0; 1122 } 1123 } 1124 return 1; 1125 } 1126 1127 static int 1128 join_append(WriterObj *self, PyObject *field, int quoted) 1129 { 1130 unsigned int field_kind = -1; 1131 void *field_data = NULL; 1132 Py_ssize_t field_len = 0; 1133 Py_ssize_t rec_len; 1134 1135 if (field != NULL) { 1136 if (PyUnicode_READY(field) == -1) 1137 return 0; 1138 field_kind = PyUnicode_KIND(field); 1139 field_data = PyUnicode_DATA(field); 1140 field_len = PyUnicode_GET_LENGTH(field); 1141 } 1142 rec_len = join_append_data(self, field_kind, field_data, field_len, 1143 "ed, 0); 1144 if (rec_len < 0) 1145 return 0; 1146 1147 /* grow record buffer if necessary */ 1148 if (!join_check_rec_size(self, rec_len)) 1149 return 0; 1150 1151 self->rec_len = join_append_data(self, field_kind, field_data, field_len, 1152 "ed, 1); 1153 self->num_fields++; 1154 1155 return 1; 1156 } 1157 1158 static int 1159 join_append_lineterminator(WriterObj *self) 1160 { 1161 Py_ssize_t terminator_len, i; 1162 unsigned int term_kind; 1163 void *term_data; 1164 1165 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); 1166 if (terminator_len == -1) 1167 return 0; 1168 1169 /* grow record buffer if necessary */ 1170 if (!join_check_rec_size(self, self->rec_len + terminator_len)) 1171 return 0; 1172 1173 term_kind = PyUnicode_KIND(self->dialect->lineterminator); 1174 term_data = PyUnicode_DATA(self->dialect->lineterminator); 1175 for (i = 0; i < terminator_len; i++) 1176 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); 1177 self->rec_len += terminator_len; 1178 1179 return 1; 1180 } 1181 1182 PyDoc_STRVAR(csv_writerow_doc, 1183 "writerow(iterable)\n" 1184 "\n" 1185 "Construct and write a CSV record from an iterable of fields. Non-string\n" 1186 "elements will be converted to string."); 1187 1188 static PyObject * 1189 csv_writerow(WriterObj *self, PyObject *seq) 1190 { 1191 DialectObj *dialect = self->dialect; 1192 PyObject *iter, *field, *line, *result; 1193 1194 iter = PyObject_GetIter(seq); 1195 if (iter == NULL) 1196 return PyErr_Format(_csvstate_global->error_obj, 1197 "iterable expected, not %.200s", 1198 seq->ob_type->tp_name); 1199 1200 /* Join all fields in internal buffer. 1201 */ 1202 join_reset(self); 1203 while ((field = PyIter_Next(iter))) { 1204 int append_ok; 1205 int quoted; 1206 1207 switch (dialect->quoting) { 1208 case QUOTE_NONNUMERIC: 1209 quoted = !PyNumber_Check(field); 1210 break; 1211 case QUOTE_ALL: 1212 quoted = 1; 1213 break; 1214 default: 1215 quoted = 0; 1216 break; 1217 } 1218 1219 if (PyUnicode_Check(field)) { 1220 append_ok = join_append(self, field, quoted); 1221 Py_DECREF(field); 1222 } 1223 else if (field == Py_None) { 1224 append_ok = join_append(self, NULL, quoted); 1225 Py_DECREF(field); 1226 } 1227 else { 1228 PyObject *str; 1229 1230 str = PyObject_Str(field); 1231 Py_DECREF(field); 1232 if (str == NULL) { 1233 Py_DECREF(iter); 1234 return NULL; 1235 } 1236 append_ok = join_append(self, str, quoted); 1237 Py_DECREF(str); 1238 } 1239 if (!append_ok) { 1240 Py_DECREF(iter); 1241 return NULL; 1242 } 1243 } 1244 Py_DECREF(iter); 1245 if (PyErr_Occurred()) 1246 return NULL; 1247 1248 if (self->num_fields > 0 && self->rec_size == 0) { 1249 if (dialect->quoting == QUOTE_NONE) { 1250 PyErr_Format(_csvstate_global->error_obj, 1251 "single empty field record must be quoted"); 1252 return NULL; 1253 } 1254 self->num_fields--; 1255 if (!join_append(self, NULL, 1)) 1256 return NULL; 1257 } 1258 1259 /* Add line terminator. 1260 */ 1261 if (!join_append_lineterminator(self)) 1262 return NULL; 1263 1264 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 1265 (void *) self->rec, self->rec_len); 1266 if (line == NULL) 1267 return NULL; 1268 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL); 1269 Py_DECREF(line); 1270 return result; 1271 } 1272 1273 PyDoc_STRVAR(csv_writerows_doc, 1274 "writerows(iterable of iterables)\n" 1275 "\n" 1276 "Construct and write a series of iterables to a csv file. Non-string\n" 1277 "elements will be converted to string."); 1278 1279 static PyObject * 1280 csv_writerows(WriterObj *self, PyObject *seqseq) 1281 { 1282 PyObject *row_iter, *row_obj, *result; 1283 1284 row_iter = PyObject_GetIter(seqseq); 1285 if (row_iter == NULL) { 1286 PyErr_SetString(PyExc_TypeError, 1287 "writerows() argument must be iterable"); 1288 return NULL; 1289 } 1290 while ((row_obj = PyIter_Next(row_iter))) { 1291 result = csv_writerow(self, row_obj); 1292 Py_DECREF(row_obj); 1293 if (!result) { 1294 Py_DECREF(row_iter); 1295 return NULL; 1296 } 1297 else 1298 Py_DECREF(result); 1299 } 1300 Py_DECREF(row_iter); 1301 if (PyErr_Occurred()) 1302 return NULL; 1303 Py_INCREF(Py_None); 1304 return Py_None; 1305 } 1306 1307 static struct PyMethodDef Writer_methods[] = { 1308 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, 1309 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, 1310 { NULL, NULL } 1311 }; 1312 1313 #define W_OFF(x) offsetof(WriterObj, x) 1314 1315 static struct PyMemberDef Writer_memberlist[] = { 1316 { "dialect", T_OBJECT, W_OFF(dialect), READONLY }, 1317 { NULL } 1318 }; 1319 1320 static void 1321 Writer_dealloc(WriterObj *self) 1322 { 1323 PyObject_GC_UnTrack(self); 1324 Py_XDECREF(self->dialect); 1325 Py_XDECREF(self->writeline); 1326 if (self->rec != NULL) 1327 PyMem_Free(self->rec); 1328 PyObject_GC_Del(self); 1329 } 1330 1331 static int 1332 Writer_traverse(WriterObj *self, visitproc visit, void *arg) 1333 { 1334 Py_VISIT(self->dialect); 1335 Py_VISIT(self->writeline); 1336 return 0; 1337 } 1338 1339 static int 1340 Writer_clear(WriterObj *self) 1341 { 1342 Py_CLEAR(self->dialect); 1343 Py_CLEAR(self->writeline); 1344 return 0; 1345 } 1346 1347 PyDoc_STRVAR(Writer_Type_doc, 1348 "CSV writer\n" 1349 "\n" 1350 "Writer objects are responsible for generating tabular data\n" 1351 "in CSV format from sequence input.\n" 1352 ); 1353 1354 static PyTypeObject Writer_Type = { 1355 PyVarObject_HEAD_INIT(NULL, 0) 1356 "_csv.writer", /*tp_name*/ 1357 sizeof(WriterObj), /*tp_basicsize*/ 1358 0, /*tp_itemsize*/ 1359 /* methods */ 1360 (destructor)Writer_dealloc, /*tp_dealloc*/ 1361 (printfunc)0, /*tp_print*/ 1362 (getattrfunc)0, /*tp_getattr*/ 1363 (setattrfunc)0, /*tp_setattr*/ 1364 0, /*tp_reserved*/ 1365 (reprfunc)0, /*tp_repr*/ 1366 0, /*tp_as_number*/ 1367 0, /*tp_as_sequence*/ 1368 0, /*tp_as_mapping*/ 1369 (hashfunc)0, /*tp_hash*/ 1370 (ternaryfunc)0, /*tp_call*/ 1371 (reprfunc)0, /*tp_str*/ 1372 0, /*tp_getattro*/ 1373 0, /*tp_setattro*/ 1374 0, /*tp_as_buffer*/ 1375 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 1376 Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 1377 Writer_Type_doc, 1378 (traverseproc)Writer_traverse, /*tp_traverse*/ 1379 (inquiry)Writer_clear, /*tp_clear*/ 1380 0, /*tp_richcompare*/ 1381 0, /*tp_weaklistoffset*/ 1382 (getiterfunc)0, /*tp_iter*/ 1383 (getiterfunc)0, /*tp_iternext*/ 1384 Writer_methods, /*tp_methods*/ 1385 Writer_memberlist, /*tp_members*/ 1386 0, /*tp_getset*/ 1387 }; 1388 1389 static PyObject * 1390 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) 1391 { 1392 PyObject * output_file, * dialect = NULL; 1393 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); 1394 _Py_IDENTIFIER(write); 1395 1396 if (!self) 1397 return NULL; 1398 1399 self->dialect = NULL; 1400 self->writeline = NULL; 1401 1402 self->rec = NULL; 1403 self->rec_size = 0; 1404 self->rec_len = 0; 1405 self->num_fields = 0; 1406 1407 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { 1408 Py_DECREF(self); 1409 return NULL; 1410 } 1411 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write); 1412 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { 1413 PyErr_SetString(PyExc_TypeError, 1414 "argument 1 must have a \"write\" method"); 1415 Py_DECREF(self); 1416 return NULL; 1417 } 1418 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); 1419 if (self->dialect == NULL) { 1420 Py_DECREF(self); 1421 return NULL; 1422 } 1423 PyObject_GC_Track(self); 1424 return (PyObject *)self; 1425 } 1426 1427 /* 1428 * DIALECT REGISTRY 1429 */ 1430 static PyObject * 1431 csv_list_dialects(PyObject *module, PyObject *args) 1432 { 1433 return PyDict_Keys(_csvstate_global->dialects); 1434 } 1435 1436 static PyObject * 1437 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) 1438 { 1439 PyObject *name_obj, *dialect_obj = NULL; 1440 PyObject *dialect; 1441 1442 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) 1443 return NULL; 1444 if (!PyUnicode_Check(name_obj)) { 1445 PyErr_SetString(PyExc_TypeError, 1446 "dialect name must be a string"); 1447 return NULL; 1448 } 1449 if (PyUnicode_READY(name_obj) == -1) 1450 return NULL; 1451 dialect = _call_dialect(dialect_obj, kwargs); 1452 if (dialect == NULL) 1453 return NULL; 1454 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) { 1455 Py_DECREF(dialect); 1456 return NULL; 1457 } 1458 Py_DECREF(dialect); 1459 Py_INCREF(Py_None); 1460 return Py_None; 1461 } 1462 1463 static PyObject * 1464 csv_unregister_dialect(PyObject *module, PyObject *name_obj) 1465 { 1466 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) 1467 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); 1468 Py_INCREF(Py_None); 1469 return Py_None; 1470 } 1471 1472 static PyObject * 1473 csv_get_dialect(PyObject *module, PyObject *name_obj) 1474 { 1475 return get_dialect_from_registry(name_obj); 1476 } 1477 1478 static PyObject * 1479 csv_field_size_limit(PyObject *module, PyObject *args) 1480 { 1481 PyObject *new_limit = NULL; 1482 long old_limit = _csvstate_global->field_limit; 1483 1484 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) 1485 return NULL; 1486 if (new_limit != NULL) { 1487 if (!PyLong_CheckExact(new_limit)) { 1488 PyErr_Format(PyExc_TypeError, 1489 "limit must be an integer"); 1490 return NULL; 1491 } 1492 _csvstate_global->field_limit = PyLong_AsLong(new_limit); 1493 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) { 1494 _csvstate_global->field_limit = old_limit; 1495 return NULL; 1496 } 1497 } 1498 return PyLong_FromLong(old_limit); 1499 } 1500 1501 /* 1502 * MODULE 1503 */ 1504 1505 PyDoc_STRVAR(csv_module_doc, 1506 "CSV parsing and writing.\n" 1507 "\n" 1508 "This module provides classes that assist in the reading and writing\n" 1509 "of Comma Separated Value (CSV) files, and implements the interface\n" 1510 "described by PEP 305. Although many CSV files are simple to parse,\n" 1511 "the format is not formally defined by a stable specification and\n" 1512 "is subtle enough that parsing lines of a CSV file with something\n" 1513 "like line.split(\",\") is bound to fail. The module supports three\n" 1514 "basic APIs: reading, writing, and registration of dialects.\n" 1515 "\n" 1516 "\n" 1517 "DIALECT REGISTRATION:\n" 1518 "\n" 1519 "Readers and writers support a dialect argument, which is a convenient\n" 1520 "handle on a group of settings. When the dialect argument is a string,\n" 1521 "it identifies one of the dialects previously registered with the module.\n" 1522 "If it is a class or instance, the attributes of the argument are used as\n" 1523 "the settings for the reader or writer:\n" 1524 "\n" 1525 " class excel:\n" 1526 " delimiter = ','\n" 1527 " quotechar = '\"'\n" 1528 " escapechar = None\n" 1529 " doublequote = True\n" 1530 " skipinitialspace = False\n" 1531 " lineterminator = '\\r\\n'\n" 1532 " quoting = QUOTE_MINIMAL\n" 1533 "\n" 1534 "SETTINGS:\n" 1535 "\n" 1536 " * quotechar - specifies a one-character string to use as the \n" 1537 " quoting character. It defaults to '\"'.\n" 1538 " * delimiter - specifies a one-character string to use as the \n" 1539 " field separator. It defaults to ','.\n" 1540 " * skipinitialspace - specifies how to interpret whitespace which\n" 1541 " immediately follows a delimiter. It defaults to False, which\n" 1542 " means that whitespace immediately following a delimiter is part\n" 1543 " of the following field.\n" 1544 " * lineterminator - specifies the character sequence which should \n" 1545 " terminate rows.\n" 1546 " * quoting - controls when quotes should be generated by the writer.\n" 1547 " It can take on any of the following module constants:\n" 1548 "\n" 1549 " csv.QUOTE_MINIMAL means only when required, for example, when a\n" 1550 " field contains either the quotechar or the delimiter\n" 1551 " csv.QUOTE_ALL means that quotes are always placed around fields.\n" 1552 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" 1553 " fields which do not parse as integers or floating point\n" 1554 " numbers.\n" 1555 " csv.QUOTE_NONE means that quotes are never placed around fields.\n" 1556 " * escapechar - specifies a one-character string used to escape \n" 1557 " the delimiter when quoting is set to QUOTE_NONE.\n" 1558 " * doublequote - controls the handling of quotes inside fields. When\n" 1559 " True, two consecutive quotes are interpreted as one during read,\n" 1560 " and when writing, each quote character embedded in the data is\n" 1561 " written as two quotes\n"); 1562 1563 PyDoc_STRVAR(csv_reader_doc, 1564 " csv_reader = reader(iterable [, dialect='excel']\n" 1565 " [optional keyword args])\n" 1566 " for row in csv_reader:\n" 1567 " process(row)\n" 1568 "\n" 1569 "The \"iterable\" argument can be any object that returns a line\n" 1570 "of input for each iteration, such as a file object or a list. The\n" 1571 "optional \"dialect\" parameter is discussed below. The function\n" 1572 "also accepts optional keyword arguments which override settings\n" 1573 "provided by the dialect.\n" 1574 "\n" 1575 "The returned object is an iterator. Each iteration returns a row\n" 1576 "of the CSV file (which can span multiple input lines).\n"); 1577 1578 PyDoc_STRVAR(csv_writer_doc, 1579 " csv_writer = csv.writer(fileobj [, dialect='excel']\n" 1580 " [optional keyword args])\n" 1581 " for row in sequence:\n" 1582 " csv_writer.writerow(row)\n" 1583 "\n" 1584 " [or]\n" 1585 "\n" 1586 " csv_writer = csv.writer(fileobj [, dialect='excel']\n" 1587 " [optional keyword args])\n" 1588 " csv_writer.writerows(rows)\n" 1589 "\n" 1590 "The \"fileobj\" argument can be any object that supports the file API.\n"); 1591 1592 PyDoc_STRVAR(csv_list_dialects_doc, 1593 "Return a list of all know dialect names.\n" 1594 " names = csv.list_dialects()"); 1595 1596 PyDoc_STRVAR(csv_get_dialect_doc, 1597 "Return the dialect instance associated with name.\n" 1598 " dialect = csv.get_dialect(name)"); 1599 1600 PyDoc_STRVAR(csv_register_dialect_doc, 1601 "Create a mapping from a string name to a dialect class.\n" 1602 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])"); 1603 1604 PyDoc_STRVAR(csv_unregister_dialect_doc, 1605 "Delete the name/dialect mapping associated with a string name.\n" 1606 " csv.unregister_dialect(name)"); 1607 1608 PyDoc_STRVAR(csv_field_size_limit_doc, 1609 "Sets an upper limit on parsed fields.\n" 1610 " csv.field_size_limit([limit])\n" 1611 "\n" 1612 "Returns old limit. If limit is not given, no new limit is set and\n" 1613 "the old limit is returned"); 1614 1615 static struct PyMethodDef csv_methods[] = { 1616 { "reader", (PyCFunction)csv_reader, 1617 METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, 1618 { "writer", (PyCFunction)csv_writer, 1619 METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, 1620 { "list_dialects", (PyCFunction)csv_list_dialects, 1621 METH_NOARGS, csv_list_dialects_doc}, 1622 { "register_dialect", (PyCFunction)csv_register_dialect, 1623 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, 1624 { "unregister_dialect", (PyCFunction)csv_unregister_dialect, 1625 METH_O, csv_unregister_dialect_doc}, 1626 { "get_dialect", (PyCFunction)csv_get_dialect, 1627 METH_O, csv_get_dialect_doc}, 1628 { "field_size_limit", (PyCFunction)csv_field_size_limit, 1629 METH_VARARGS, csv_field_size_limit_doc}, 1630 { NULL, NULL } 1631 }; 1632 1633 static struct PyModuleDef _csvmodule = { 1634 PyModuleDef_HEAD_INIT, 1635 "_csv", 1636 csv_module_doc, 1637 sizeof(_csvstate), 1638 csv_methods, 1639 NULL, 1640 _csv_traverse, 1641 _csv_clear, 1642 _csv_free 1643 }; 1644 1645 PyMODINIT_FUNC 1646 PyInit__csv(void) 1647 { 1648 PyObject *module; 1649 const StyleDesc *style; 1650 1651 if (PyType_Ready(&Dialect_Type) < 0) 1652 return NULL; 1653 1654 if (PyType_Ready(&Reader_Type) < 0) 1655 return NULL; 1656 1657 if (PyType_Ready(&Writer_Type) < 0) 1658 return NULL; 1659 1660 /* Create the module and add the functions */ 1661 module = PyModule_Create(&_csvmodule); 1662 if (module == NULL) 1663 return NULL; 1664 1665 /* Add version to the module. */ 1666 if (PyModule_AddStringConstant(module, "__version__", 1667 MODULE_VERSION) == -1) 1668 return NULL; 1669 1670 /* Set the field limit */ 1671 _csvstate(module)->field_limit = 128 * 1024; 1672 /* Do I still need to add this var to the Module Dict? */ 1673 1674 /* Add _dialects dictionary */ 1675 _csvstate(module)->dialects = PyDict_New(); 1676 if (_csvstate(module)->dialects == NULL) 1677 return NULL; 1678 Py_INCREF(_csvstate(module)->dialects); 1679 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects)) 1680 return NULL; 1681 1682 /* Add quote styles into dictionary */ 1683 for (style = quote_styles; style->name; style++) { 1684 if (PyModule_AddIntConstant(module, style->name, 1685 style->style) == -1) 1686 return NULL; 1687 } 1688 1689 /* Add the Dialect type */ 1690 Py_INCREF(&Dialect_Type); 1691 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) 1692 return NULL; 1693 1694 /* Add the CSV exception object to the module. */ 1695 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL); 1696 if (_csvstate(module)->error_obj == NULL) 1697 return NULL; 1698 Py_INCREF(_csvstate(module)->error_obj); 1699 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj); 1700 return module; 1701 } 1702