Home | History | Annotate | Download | only in Modules
      1 /* csv module */
      2 
      3 /*
      4 
      5 This module provides the low-level underpinnings of a CSV reading/writing
      6 module.  Users should not use this module directly, but import the csv.py
      7 module instead.
      8 
      9 */
     10 
     11 #define MODULE_VERSION "1.0"
     12 
     13 #include "Python.h"
     14 #include "structmember.h"
     15 
     16 
     17 typedef struct {
     18     PyObject *error_obj;   /* CSV exception */
     19     PyObject *dialects;   /* Dialect registry */
     20     long field_limit;   /* max parsed field size */
     21 } _csvstate;
     22 
     23 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
     24 
     25 static int
     26 _csv_clear(PyObject *m)
     27 {
     28     Py_CLEAR(_csvstate(m)->error_obj);
     29     Py_CLEAR(_csvstate(m)->dialects);
     30     return 0;
     31 }
     32 
     33 static int
     34 _csv_traverse(PyObject *m, visitproc visit, void *arg)
     35 {
     36     Py_VISIT(_csvstate(m)->error_obj);
     37     Py_VISIT(_csvstate(m)->dialects);
     38     return 0;
     39 }
     40 
     41 static void
     42 _csv_free(void *m)
     43 {
     44    _csv_clear((PyObject *)m);
     45 }
     46 
     47 static struct PyModuleDef _csvmodule;
     48 
     49 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
     50 
     51 typedef enum {
     52     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
     53     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
     54     EAT_CRNL,AFTER_ESCAPED_CRNL
     55 } ParserState;
     56 
     57 typedef enum {
     58     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
     59 } QuoteStyle;
     60 
     61 typedef struct {
     62     QuoteStyle style;
     63     const char *name;
     64 } StyleDesc;
     65 
     66 static const StyleDesc quote_styles[] = {
     67     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
     68     { QUOTE_ALL,        "QUOTE_ALL" },
     69     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
     70     { QUOTE_NONE,       "QUOTE_NONE" },
     71     { 0 }
     72 };
     73 
     74 typedef struct {
     75     PyObject_HEAD
     76 
     77     int doublequote;            /* is " represented by ""? */
     78     Py_UCS4 delimiter;       /* field separator */
     79     Py_UCS4 quotechar;       /* quote character */
     80     Py_UCS4 escapechar;      /* escape character */
     81     int skipinitialspace;       /* ignore spaces following delimiter? */
     82     PyObject *lineterminator; /* string to write between records */
     83     int quoting;                /* style of quoting to write */
     84 
     85     int strict;                 /* raise exception on bad CSV */
     86 } DialectObj;
     87 
     88 static PyTypeObject Dialect_Type;
     89 
     90 typedef struct {
     91     PyObject_HEAD
     92 
     93     PyObject *input_iter;   /* iterate over this for input lines */
     94 
     95     DialectObj *dialect;    /* parsing dialect */
     96 
     97     PyObject *fields;           /* field list for current record */
     98     ParserState state;          /* current CSV parse state */
     99     Py_UCS4 *field;             /* temporary buffer */
    100     Py_ssize_t field_size;      /* size of allocated buffer */
    101     Py_ssize_t field_len;       /* length of current field */
    102     int numeric_field;          /* treat field as numeric */
    103     unsigned long line_num;     /* Source-file line number */
    104 } ReaderObj;
    105 
    106 static PyTypeObject Reader_Type;
    107 
    108 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
    109 
    110 typedef struct {
    111     PyObject_HEAD
    112 
    113     PyObject *writeline;    /* write output lines to this file */
    114 
    115     DialectObj *dialect;    /* parsing dialect */
    116 
    117     Py_UCS4 *rec;            /* buffer for parser.join */
    118     Py_ssize_t rec_size;        /* size of allocated record */
    119     Py_ssize_t rec_len;         /* length of record */
    120     int num_fields;             /* number of fields in record */
    121 } WriterObj;
    122 
    123 static PyTypeObject Writer_Type;
    124 
    125 /*
    126  * DIALECT class
    127  */
    128 
    129 static PyObject *
    130 get_dialect_from_registry(PyObject * name_obj)
    131 {
    132     PyObject *dialect_obj;
    133 
    134     dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
    135     if (dialect_obj == NULL) {
    136         if (!PyErr_Occurred())
    137             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
    138     }
    139     else
    140         Py_INCREF(dialect_obj);
    141     return dialect_obj;
    142 }
    143 
    144 static PyObject *
    145 get_string(PyObject *str)
    146 {
    147     Py_XINCREF(str);
    148     return str;
    149 }
    150 
    151 static PyObject *
    152 get_nullchar_as_None(Py_UCS4 c)
    153 {
    154     if (c == '\0') {
    155         Py_INCREF(Py_None);
    156         return Py_None;
    157     }
    158     else
    159         return PyUnicode_FromOrdinal(c);
    160 }
    161 
    162 static PyObject *
    163 Dialect_get_lineterminator(DialectObj *self)
    164 {
    165     return get_string(self->lineterminator);
    166 }
    167 
    168 static PyObject *
    169 Dialect_get_delimiter(DialectObj *self)
    170 {
    171     return get_nullchar_as_None(self->delimiter);
    172 }
    173 
    174 static PyObject *
    175 Dialect_get_escapechar(DialectObj *self)
    176 {
    177     return get_nullchar_as_None(self->escapechar);
    178 }
    179 
    180 static PyObject *
    181 Dialect_get_quotechar(DialectObj *self)
    182 {
    183     return get_nullchar_as_None(self->quotechar);
    184 }
    185 
    186 static PyObject *
    187 Dialect_get_quoting(DialectObj *self)
    188 {
    189     return PyLong_FromLong(self->quoting);
    190 }
    191 
    192 static int
    193 _set_bool(const char *name, int *target, PyObject *src, int dflt)
    194 {
    195     if (src == NULL)
    196         *target = dflt;
    197     else {
    198         int b = PyObject_IsTrue(src);
    199         if (b < 0)
    200             return -1;
    201         *target = b;
    202     }
    203     return 0;
    204 }
    205 
    206 static int
    207 _set_int(const char *name, int *target, PyObject *src, int dflt)
    208 {
    209     if (src == NULL)
    210         *target = dflt;
    211     else {
    212         long value;
    213         if (!PyLong_CheckExact(src)) {
    214             PyErr_Format(PyExc_TypeError,
    215                          "\"%s\" must be an integer", name);
    216             return -1;
    217         }
    218         value = PyLong_AsLong(src);
    219         if (value == -1 && PyErr_Occurred())
    220             return -1;
    221 #if SIZEOF_LONG > SIZEOF_INT
    222         if (value > INT_MAX || value < INT_MIN) {
    223             PyErr_Format(PyExc_ValueError,
    224                          "integer out of range for \"%s\"", name);
    225             return -1;
    226         }
    227 #endif
    228         *target = (int)value;
    229     }
    230     return 0;
    231 }
    232 
    233 static int
    234 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
    235 {
    236     if (src == NULL)
    237         *target = dflt;
    238     else {
    239         *target = '\0';
    240         if (src != Py_None) {
    241             Py_ssize_t len;
    242             if (!PyUnicode_Check(src)) {
    243                 PyErr_Format(PyExc_TypeError,
    244                     "\"%s\" must be string, not %.200s", name,
    245                     src->ob_type->tp_name);
    246                 return -1;
    247             }
    248             len = PyUnicode_GetLength(src);
    249             if (len > 1) {
    250                 PyErr_Format(PyExc_TypeError,
    251                     "\"%s\" must be a 1-character string",
    252                     name);
    253                 return -1;
    254             }
    255             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
    256             if (len > 0)
    257                 *target = PyUnicode_READ_CHAR(src, 0);
    258         }
    259     }
    260     return 0;
    261 }
    262 
    263 static int
    264 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
    265 {
    266     if (src == NULL)
    267         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
    268     else {
    269         if (src == Py_None)
    270             *target = NULL;
    271         else if (!PyUnicode_Check(src)) {
    272             PyErr_Format(PyExc_TypeError,
    273                          "\"%s\" must be a string", name);
    274             return -1;
    275         }
    276         else {
    277             if (PyUnicode_READY(src) == -1)
    278                 return -1;
    279             Py_INCREF(src);
    280             Py_XSETREF(*target, src);
    281         }
    282     }
    283     return 0;
    284 }
    285 
    286 static int
    287 dialect_check_quoting(int quoting)
    288 {
    289     const StyleDesc *qs;
    290 
    291     for (qs = quote_styles; qs->name; qs++) {
    292         if ((int)qs->style == quoting)
    293             return 0;
    294     }
    295     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
    296     return -1;
    297 }
    298 
    299 #define D_OFF(x) offsetof(DialectObj, x)
    300 
    301 static struct PyMemberDef Dialect_memberlist[] = {
    302     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
    303     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
    304     { "strict",             T_INT, D_OFF(strict), READONLY },
    305     { NULL }
    306 };
    307 
    308 static PyGetSetDef Dialect_getsetlist[] = {
    309     { "delimiter",          (getter)Dialect_get_delimiter},
    310     { "escapechar",             (getter)Dialect_get_escapechar},
    311     { "lineterminator",         (getter)Dialect_get_lineterminator},
    312     { "quotechar",              (getter)Dialect_get_quotechar},
    313     { "quoting",                (getter)Dialect_get_quoting},
    314     {NULL},
    315 };
    316 
    317 static void
    318 Dialect_dealloc(DialectObj *self)
    319 {
    320     Py_XDECREF(self->lineterminator);
    321     Py_TYPE(self)->tp_free((PyObject *)self);
    322 }
    323 
    324 static char *dialect_kws[] = {
    325     "dialect",
    326     "delimiter",
    327     "doublequote",
    328     "escapechar",
    329     "lineterminator",
    330     "quotechar",
    331     "quoting",
    332     "skipinitialspace",
    333     "strict",
    334     NULL
    335 };
    336 
    337 static PyObject *
    338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
    339 {
    340     DialectObj *self;
    341     PyObject *ret = NULL;
    342     PyObject *dialect = NULL;
    343     PyObject *delimiter = NULL;
    344     PyObject *doublequote = NULL;
    345     PyObject *escapechar = NULL;
    346     PyObject *lineterminator = NULL;
    347     PyObject *quotechar = NULL;
    348     PyObject *quoting = NULL;
    349     PyObject *skipinitialspace = NULL;
    350     PyObject *strict = NULL;
    351 
    352     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
    353                                      "|OOOOOOOOO", dialect_kws,
    354                                      &dialect,
    355                                      &delimiter,
    356                                      &doublequote,
    357                                      &escapechar,
    358                                      &lineterminator,
    359                                      &quotechar,
    360                                      &quoting,
    361                                      &skipinitialspace,
    362                                      &strict))
    363         return NULL;
    364 
    365     if (dialect != NULL) {
    366         if (PyUnicode_Check(dialect)) {
    367             dialect = get_dialect_from_registry(dialect);
    368             if (dialect == NULL)
    369                 return NULL;
    370         }
    371         else
    372             Py_INCREF(dialect);
    373         /* Can we reuse this instance? */
    374         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
    375             delimiter == 0 &&
    376             doublequote == 0 &&
    377             escapechar == 0 &&
    378             lineterminator == 0 &&
    379             quotechar == 0 &&
    380             quoting == 0 &&
    381             skipinitialspace == 0 &&
    382             strict == 0)
    383             return dialect;
    384     }
    385 
    386     self = (DialectObj *)type->tp_alloc(type, 0);
    387     if (self == NULL) {
    388         Py_XDECREF(dialect);
    389         return NULL;
    390     }
    391     self->lineterminator = NULL;
    392 
    393     Py_XINCREF(delimiter);
    394     Py_XINCREF(doublequote);
    395     Py_XINCREF(escapechar);
    396     Py_XINCREF(lineterminator);
    397     Py_XINCREF(quotechar);
    398     Py_XINCREF(quoting);
    399     Py_XINCREF(skipinitialspace);
    400     Py_XINCREF(strict);
    401     if (dialect != NULL) {
    402 #define DIALECT_GETATTR(v, n) \
    403         if (v == NULL) \
    404             v = PyObject_GetAttrString(dialect, n)
    405         DIALECT_GETATTR(delimiter, "delimiter");
    406         DIALECT_GETATTR(doublequote, "doublequote");
    407         DIALECT_GETATTR(escapechar, "escapechar");
    408         DIALECT_GETATTR(lineterminator, "lineterminator");
    409         DIALECT_GETATTR(quotechar, "quotechar");
    410         DIALECT_GETATTR(quoting, "quoting");
    411         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
    412         DIALECT_GETATTR(strict, "strict");
    413         PyErr_Clear();
    414     }
    415 
    416     /* check types and convert to C values */
    417 #define DIASET(meth, name, target, src, dflt) \
    418     if (meth(name, target, src, dflt)) \
    419         goto err
    420     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
    421     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
    422     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
    423     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
    424     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
    425     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
    426     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
    427     DIASET(_set_bool, "strict", &self->strict, strict, 0);
    428 
    429     /* validate options */
    430     if (dialect_check_quoting(self->quoting))
    431         goto err;
    432     if (self->delimiter == 0) {
    433         PyErr_SetString(PyExc_TypeError,
    434                         "\"delimiter\" must be a 1-character string");
    435         goto err;
    436     }
    437     if (quotechar == Py_None && quoting == NULL)
    438         self->quoting = QUOTE_NONE;
    439     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
    440         PyErr_SetString(PyExc_TypeError,
    441                         "quotechar must be set if quoting enabled");
    442         goto err;
    443     }
    444     if (self->lineterminator == 0) {
    445         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
    446         goto err;
    447     }
    448 
    449     ret = (PyObject *)self;
    450     Py_INCREF(self);
    451 err:
    452     Py_XDECREF(self);
    453     Py_XDECREF(dialect);
    454     Py_XDECREF(delimiter);
    455     Py_XDECREF(doublequote);
    456     Py_XDECREF(escapechar);
    457     Py_XDECREF(lineterminator);
    458     Py_XDECREF(quotechar);
    459     Py_XDECREF(quoting);
    460     Py_XDECREF(skipinitialspace);
    461     Py_XDECREF(strict);
    462     return ret;
    463 }
    464 
    465 
    466 PyDoc_STRVAR(Dialect_Type_doc,
    467 "CSV dialect\n"
    468 "\n"
    469 "The Dialect type records CSV parsing and generation options.\n");
    470 
    471 static PyTypeObject Dialect_Type = {
    472     PyVarObject_HEAD_INIT(NULL, 0)
    473     "_csv.Dialect",                         /* tp_name */
    474     sizeof(DialectObj),                     /* tp_basicsize */
    475     0,                                      /* tp_itemsize */
    476     /*  methods  */
    477     (destructor)Dialect_dealloc,            /* tp_dealloc */
    478     (printfunc)0,                           /* tp_print */
    479     (getattrfunc)0,                         /* tp_getattr */
    480     (setattrfunc)0,                         /* tp_setattr */
    481     0,                                      /* tp_reserved */
    482     (reprfunc)0,                            /* tp_repr */
    483     0,                                      /* tp_as_number */
    484     0,                                      /* tp_as_sequence */
    485     0,                                      /* tp_as_mapping */
    486     (hashfunc)0,                            /* tp_hash */
    487     (ternaryfunc)0,                         /* tp_call */
    488     (reprfunc)0,                                /* tp_str */
    489     0,                                      /* tp_getattro */
    490     0,                                      /* tp_setattro */
    491     0,                                      /* tp_as_buffer */
    492     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
    493     Dialect_Type_doc,                       /* tp_doc */
    494     0,                                      /* tp_traverse */
    495     0,                                      /* tp_clear */
    496     0,                                      /* tp_richcompare */
    497     0,                                      /* tp_weaklistoffset */
    498     0,                                      /* tp_iter */
    499     0,                                      /* tp_iternext */
    500     0,                                          /* tp_methods */
    501     Dialect_memberlist,                     /* tp_members */
    502     Dialect_getsetlist,                     /* tp_getset */
    503     0,                                          /* tp_base */
    504     0,                                          /* tp_dict */
    505     0,                                          /* tp_descr_get */
    506     0,                                          /* tp_descr_set */
    507     0,                                          /* tp_dictoffset */
    508     0,                                          /* tp_init */
    509     0,                                          /* tp_alloc */
    510     dialect_new,                                /* tp_new */
    511     0,                                          /* tp_free */
    512 };
    513 
    514 /*
    515  * Return an instance of the dialect type, given a Python instance or kwarg
    516  * description of the dialect
    517  */
    518 static PyObject *
    519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
    520 {
    521     PyObject *type = (PyObject *)&Dialect_Type;
    522     if (dialect_inst) {
    523         return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
    524     }
    525     else {
    526         return _PyObject_FastCallDict(type, NULL, 0, kwargs);
    527     }
    528 }
    529 
    530 /*
    531  * READER
    532  */
    533 static int
    534 parse_save_field(ReaderObj *self)
    535 {
    536     PyObject *field;
    537 
    538     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
    539                                       (void *) self->field, self->field_len);
    540     if (field == NULL)
    541         return -1;
    542     self->field_len = 0;
    543     if (self->numeric_field) {
    544         PyObject *tmp;
    545 
    546         self->numeric_field = 0;
    547         tmp = PyNumber_Float(field);
    548         Py_DECREF(field);
    549         if (tmp == NULL)
    550             return -1;
    551         field = tmp;
    552     }
    553     if (PyList_Append(self->fields, field) < 0) {
    554         Py_DECREF(field);
    555         return -1;
    556     }
    557     Py_DECREF(field);
    558     return 0;
    559 }
    560 
    561 static int
    562 parse_grow_buff(ReaderObj *self)
    563 {
    564     if (self->field_size == 0) {
    565         self->field_size = 4096;
    566         if (self->field != NULL)
    567             PyMem_Free(self->field);
    568         self->field = PyMem_New(Py_UCS4, self->field_size);
    569     }
    570     else {
    571         Py_UCS4 *field = self->field;
    572         if (self->field_size > PY_SSIZE_T_MAX / 2) {
    573             PyErr_NoMemory();
    574             return 0;
    575         }
    576         self->field_size *= 2;
    577         self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
    578     }
    579     if (self->field == NULL) {
    580         PyErr_NoMemory();
    581         return 0;
    582     }
    583     return 1;
    584 }
    585 
    586 static int
    587 parse_add_char(ReaderObj *self, Py_UCS4 c)
    588 {
    589     if (self->field_len >= _csvstate_global->field_limit) {
    590         PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
    591                      _csvstate_global->field_limit);
    592         return -1;
    593     }
    594     if (self->field_len == self->field_size && !parse_grow_buff(self))
    595         return -1;
    596     self->field[self->field_len++] = c;
    597     return 0;
    598 }
    599 
    600 static int
    601 parse_process_char(ReaderObj *self, Py_UCS4 c)
    602 {
    603     DialectObj *dialect = self->dialect;
    604 
    605     switch (self->state) {
    606     case START_RECORD:
    607         /* start of record */
    608         if (c == '\0')
    609             /* empty line - return [] */
    610             break;
    611         else if (c == '\n' || c == '\r') {
    612             self->state = EAT_CRNL;
    613             break;
    614         }
    615         /* normal character - handle as START_FIELD */
    616         self->state = START_FIELD;
    617         /* fallthru */
    618     case START_FIELD:
    619         /* expecting field */
    620         if (c == '\n' || c == '\r' || c == '\0') {
    621             /* save empty field - return [fields] */
    622             if (parse_save_field(self) < 0)
    623                 return -1;
    624             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
    625         }
    626         else if (c == dialect->quotechar &&
    627                  dialect->quoting != QUOTE_NONE) {
    628             /* start quoted field */
    629             self->state = IN_QUOTED_FIELD;
    630         }
    631         else if (c == dialect->escapechar) {
    632             /* possible escaped character */
    633             self->state = ESCAPED_CHAR;
    634         }
    635         else if (c == ' ' && dialect->skipinitialspace)
    636             /* ignore space at start of field */
    637             ;
    638         else if (c == dialect->delimiter) {
    639             /* save empty field */
    640             if (parse_save_field(self) < 0)
    641                 return -1;
    642         }
    643         else {
    644             /* begin new unquoted field */
    645             if (dialect->quoting == QUOTE_NONNUMERIC)
    646                 self->numeric_field = 1;
    647             if (parse_add_char(self, c) < 0)
    648                 return -1;
    649             self->state = IN_FIELD;
    650         }
    651         break;
    652 
    653     case ESCAPED_CHAR:
    654         if (c == '\n' || c=='\r') {
    655             if (parse_add_char(self, c) < 0)
    656                 return -1;
    657             self->state = AFTER_ESCAPED_CRNL;
    658             break;
    659         }
    660         if (c == '\0')
    661             c = '\n';
    662         if (parse_add_char(self, c) < 0)
    663             return -1;
    664         self->state = IN_FIELD;
    665         break;
    666 
    667     case AFTER_ESCAPED_CRNL:
    668         if (c == '\0')
    669             break;
    670         /*fallthru*/
    671 
    672     case IN_FIELD:
    673         /* in unquoted field */
    674         if (c == '\n' || c == '\r' || c == '\0') {
    675             /* end of line - return [fields] */
    676             if (parse_save_field(self) < 0)
    677                 return -1;
    678             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
    679         }
    680         else if (c == dialect->escapechar) {
    681             /* possible escaped character */
    682             self->state = ESCAPED_CHAR;
    683         }
    684         else if (c == dialect->delimiter) {
    685             /* save field - wait for new field */
    686             if (parse_save_field(self) < 0)
    687                 return -1;
    688             self->state = START_FIELD;
    689         }
    690         else {
    691             /* normal character - save in field */
    692             if (parse_add_char(self, c) < 0)
    693                 return -1;
    694         }
    695         break;
    696 
    697     case IN_QUOTED_FIELD:
    698         /* in quoted field */
    699         if (c == '\0')
    700             ;
    701         else if (c == dialect->escapechar) {
    702             /* Possible escape character */
    703             self->state = ESCAPE_IN_QUOTED_FIELD;
    704         }
    705         else if (c == dialect->quotechar &&
    706                  dialect->quoting != QUOTE_NONE) {
    707             if (dialect->doublequote) {
    708                 /* doublequote; " represented by "" */
    709                 self->state = QUOTE_IN_QUOTED_FIELD;
    710             }
    711             else {
    712                 /* end of quote part of field */
    713                 self->state = IN_FIELD;
    714             }
    715         }
    716         else {
    717             /* normal character - save in field */
    718             if (parse_add_char(self, c) < 0)
    719                 return -1;
    720         }
    721         break;
    722 
    723     case ESCAPE_IN_QUOTED_FIELD:
    724         if (c == '\0')
    725             c = '\n';
    726         if (parse_add_char(self, c) < 0)
    727             return -1;
    728         self->state = IN_QUOTED_FIELD;
    729         break;
    730 
    731     case QUOTE_IN_QUOTED_FIELD:
    732         /* doublequote - seen a quote in a quoted field */
    733         if (dialect->quoting != QUOTE_NONE &&
    734             c == dialect->quotechar) {
    735             /* save "" as " */
    736             if (parse_add_char(self, c) < 0)
    737                 return -1;
    738             self->state = IN_QUOTED_FIELD;
    739         }
    740         else if (c == dialect->delimiter) {
    741             /* save field - wait for new field */
    742             if (parse_save_field(self) < 0)
    743                 return -1;
    744             self->state = START_FIELD;
    745         }
    746         else if (c == '\n' || c == '\r' || c == '\0') {
    747             /* end of line - return [fields] */
    748             if (parse_save_field(self) < 0)
    749                 return -1;
    750             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
    751         }
    752         else if (!dialect->strict) {
    753             if (parse_add_char(self, c) < 0)
    754                 return -1;
    755             self->state = IN_FIELD;
    756         }
    757         else {
    758             /* illegal */
    759             PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
    760                             dialect->delimiter,
    761                             dialect->quotechar);
    762             return -1;
    763         }
    764         break;
    765 
    766     case EAT_CRNL:
    767         if (c == '\n' || c == '\r')
    768             ;
    769         else if (c == '\0')
    770             self->state = START_RECORD;
    771         else {
    772             PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
    773             return -1;
    774         }
    775         break;
    776 
    777     }
    778     return 0;
    779 }
    780 
    781 static int
    782 parse_reset(ReaderObj *self)
    783 {
    784     Py_XSETREF(self->fields, PyList_New(0));
    785     if (self->fields == NULL)
    786         return -1;
    787     self->field_len = 0;
    788     self->state = START_RECORD;
    789     self->numeric_field = 0;
    790     return 0;
    791 }
    792 
    793 static PyObject *
    794 Reader_iternext(ReaderObj *self)
    795 {
    796     PyObject *fields = NULL;
    797     Py_UCS4 c;
    798     Py_ssize_t pos, linelen;
    799     unsigned int kind;
    800     void *data;
    801     PyObject *lineobj;
    802 
    803     if (parse_reset(self) < 0)
    804         return NULL;
    805     do {
    806         lineobj = PyIter_Next(self->input_iter);
    807         if (lineobj == NULL) {
    808             /* End of input OR exception */
    809             if (!PyErr_Occurred() && (self->field_len != 0 ||
    810                                       self->state == IN_QUOTED_FIELD)) {
    811                 if (self->dialect->strict)
    812                     PyErr_SetString(_csvstate_global->error_obj,
    813                                     "unexpected end of data");
    814                 else if (parse_save_field(self) >= 0)
    815                     break;
    816             }
    817             return NULL;
    818         }
    819         if (!PyUnicode_Check(lineobj)) {
    820             PyErr_Format(_csvstate_global->error_obj,
    821                          "iterator should return strings, "
    822                          "not %.200s "
    823                          "(did you open the file in text mode?)",
    824                          lineobj->ob_type->tp_name
    825                 );
    826             Py_DECREF(lineobj);
    827             return NULL;
    828         }
    829         if (PyUnicode_READY(lineobj) == -1) {
    830             Py_DECREF(lineobj);
    831             return NULL;
    832         }
    833         ++self->line_num;
    834         kind = PyUnicode_KIND(lineobj);
    835         data = PyUnicode_DATA(lineobj);
    836         pos = 0;
    837         linelen = PyUnicode_GET_LENGTH(lineobj);
    838         while (linelen--) {
    839             c = PyUnicode_READ(kind, data, pos);
    840             if (c == '\0') {
    841                 Py_DECREF(lineobj);
    842                 PyErr_Format(_csvstate_global->error_obj,
    843                              "line contains NULL byte");
    844                 goto err;
    845             }
    846             if (parse_process_char(self, c) < 0) {
    847                 Py_DECREF(lineobj);
    848                 goto err;
    849             }
    850             pos++;
    851         }
    852         Py_DECREF(lineobj);
    853         if (parse_process_char(self, 0) < 0)
    854             goto err;
    855     } while (self->state != START_RECORD);
    856 
    857     fields = self->fields;
    858     self->fields = NULL;
    859 err:
    860     return fields;
    861 }
    862 
    863 static void
    864 Reader_dealloc(ReaderObj *self)
    865 {
    866     PyObject_GC_UnTrack(self);
    867     Py_XDECREF(self->dialect);
    868     Py_XDECREF(self->input_iter);
    869     Py_XDECREF(self->fields);
    870     if (self->field != NULL)
    871         PyMem_Free(self->field);
    872     PyObject_GC_Del(self);
    873 }
    874 
    875 static int
    876 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
    877 {
    878     Py_VISIT(self->dialect);
    879     Py_VISIT(self->input_iter);
    880     Py_VISIT(self->fields);
    881     return 0;
    882 }
    883 
    884 static int
    885 Reader_clear(ReaderObj *self)
    886 {
    887     Py_CLEAR(self->dialect);
    888     Py_CLEAR(self->input_iter);
    889     Py_CLEAR(self->fields);
    890     return 0;
    891 }
    892 
    893 PyDoc_STRVAR(Reader_Type_doc,
    894 "CSV reader\n"
    895 "\n"
    896 "Reader objects are responsible for reading and parsing tabular data\n"
    897 "in CSV format.\n"
    898 );
    899 
    900 static struct PyMethodDef Reader_methods[] = {
    901     { NULL, NULL }
    902 };
    903 #define R_OFF(x) offsetof(ReaderObj, x)
    904 
    905 static struct PyMemberDef Reader_memberlist[] = {
    906     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
    907     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
    908     { NULL }
    909 };
    910 
    911 
    912 static PyTypeObject Reader_Type = {
    913     PyVarObject_HEAD_INIT(NULL, 0)
    914     "_csv.reader",                          /*tp_name*/
    915     sizeof(ReaderObj),                      /*tp_basicsize*/
    916     0,                                      /*tp_itemsize*/
    917     /* methods */
    918     (destructor)Reader_dealloc,             /*tp_dealloc*/
    919     (printfunc)0,                           /*tp_print*/
    920     (getattrfunc)0,                         /*tp_getattr*/
    921     (setattrfunc)0,                         /*tp_setattr*/
    922     0,                                     /*tp_reserved*/
    923     (reprfunc)0,                            /*tp_repr*/
    924     0,                                      /*tp_as_number*/
    925     0,                                      /*tp_as_sequence*/
    926     0,                                      /*tp_as_mapping*/
    927     (hashfunc)0,                            /*tp_hash*/
    928     (ternaryfunc)0,                         /*tp_call*/
    929     (reprfunc)0,                                /*tp_str*/
    930     0,                                      /*tp_getattro*/
    931     0,                                      /*tp_setattro*/
    932     0,                                      /*tp_as_buffer*/
    933     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
    934         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
    935     Reader_Type_doc,                        /*tp_doc*/
    936     (traverseproc)Reader_traverse,          /*tp_traverse*/
    937     (inquiry)Reader_clear,                  /*tp_clear*/
    938     0,                                      /*tp_richcompare*/
    939     0,                                      /*tp_weaklistoffset*/
    940     PyObject_SelfIter,                          /*tp_iter*/
    941     (getiterfunc)Reader_iternext,           /*tp_iternext*/
    942     Reader_methods,                         /*tp_methods*/
    943     Reader_memberlist,                      /*tp_members*/
    944     0,                                      /*tp_getset*/
    945 
    946 };
    947 
    948 static PyObject *
    949 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
    950 {
    951     PyObject * iterator, * dialect = NULL;
    952     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
    953 
    954     if (!self)
    955         return NULL;
    956 
    957     self->dialect = NULL;
    958     self->fields = NULL;
    959     self->input_iter = NULL;
    960     self->field = NULL;
    961     self->field_size = 0;
    962     self->line_num = 0;
    963 
    964     if (parse_reset(self) < 0) {
    965         Py_DECREF(self);
    966         return NULL;
    967     }
    968 
    969     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
    970         Py_DECREF(self);
    971         return NULL;
    972     }
    973     self->input_iter = PyObject_GetIter(iterator);
    974     if (self->input_iter == NULL) {
    975         PyErr_SetString(PyExc_TypeError,
    976                         "argument 1 must be an iterator");
    977         Py_DECREF(self);
    978         return NULL;
    979     }
    980     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
    981     if (self->dialect == NULL) {
    982         Py_DECREF(self);
    983         return NULL;
    984     }
    985 
    986     PyObject_GC_Track(self);
    987     return (PyObject *)self;
    988 }
    989 
    990 /*
    991  * WRITER
    992  */
    993 /* ---------------------------------------------------------------- */
    994 static void
    995 join_reset(WriterObj *self)
    996 {
    997     self->rec_len = 0;
    998     self->num_fields = 0;
    999 }
   1000 
   1001 #define MEM_INCR 32768
   1002 
   1003 /* Calculate new record length or append field to record.  Return new
   1004  * record length.
   1005  */
   1006 static Py_ssize_t
   1007 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
   1008                  Py_ssize_t field_len, int *quoted,
   1009                  int copy_phase)
   1010 {
   1011     DialectObj *dialect = self->dialect;
   1012     int i;
   1013     Py_ssize_t rec_len;
   1014 
   1015 #define INCLEN \
   1016     do {\
   1017         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
   1018             goto overflow; \
   1019         } \
   1020         rec_len++; \
   1021     } while(0)
   1022 
   1023 #define ADDCH(c)                                \
   1024     do {\
   1025         if (copy_phase) \
   1026             self->rec[rec_len] = c;\
   1027         INCLEN;\
   1028     } while(0)
   1029 
   1030     rec_len = self->rec_len;
   1031 
   1032     /* If this is not the first field we need a field separator */
   1033     if (self->num_fields > 0)
   1034         ADDCH(dialect->delimiter);
   1035 
   1036     /* Handle preceding quote */
   1037     if (copy_phase && *quoted)
   1038         ADDCH(dialect->quotechar);
   1039 
   1040     /* Copy/count field data */
   1041     /* If field is null just pass over */
   1042     for (i = 0; field_data && (i < field_len); i++) {
   1043         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
   1044         int want_escape = 0;
   1045 
   1046         if (c == dialect->delimiter ||
   1047             c == dialect->escapechar ||
   1048             c == dialect->quotechar  ||
   1049             PyUnicode_FindChar(
   1050                 dialect->lineterminator, c, 0,
   1051                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
   1052             if (dialect->quoting == QUOTE_NONE)
   1053                 want_escape = 1;
   1054             else {
   1055                 if (c == dialect->quotechar) {
   1056                     if (dialect->doublequote)
   1057                         ADDCH(dialect->quotechar);
   1058                     else
   1059                         want_escape = 1;
   1060                 }
   1061                 if (!want_escape)
   1062                     *quoted = 1;
   1063             }
   1064             if (want_escape) {
   1065                 if (!dialect->escapechar) {
   1066                     PyErr_Format(_csvstate_global->error_obj,
   1067                                  "need to escape, but no escapechar set");
   1068                     return -1;
   1069                 }
   1070                 ADDCH(dialect->escapechar);
   1071             }
   1072         }
   1073         /* Copy field character into record buffer.
   1074          */
   1075         ADDCH(c);
   1076     }
   1077 
   1078     if (*quoted) {
   1079         if (copy_phase)
   1080             ADDCH(dialect->quotechar);
   1081         else {
   1082             INCLEN; /* starting quote */
   1083             INCLEN; /* ending quote */
   1084         }
   1085     }
   1086     return rec_len;
   1087 
   1088   overflow:
   1089     PyErr_NoMemory();
   1090     return -1;
   1091 #undef ADDCH
   1092 #undef INCLEN
   1093 }
   1094 
   1095 static int
   1096 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
   1097 {
   1098 
   1099     if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
   1100         PyErr_NoMemory();
   1101         return 0;
   1102     }
   1103 
   1104     if (rec_len > self->rec_size) {
   1105         if (self->rec_size == 0) {
   1106             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
   1107             if (self->rec != NULL)
   1108                 PyMem_Free(self->rec);
   1109             self->rec = PyMem_New(Py_UCS4, self->rec_size);
   1110         }
   1111         else {
   1112             Py_UCS4* old_rec = self->rec;
   1113 
   1114             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
   1115             self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
   1116             if (self->rec == NULL)
   1117                 PyMem_Free(old_rec);
   1118         }
   1119         if (self->rec == NULL) {
   1120             PyErr_NoMemory();
   1121             return 0;
   1122         }
   1123     }
   1124     return 1;
   1125 }
   1126 
   1127 static int
   1128 join_append(WriterObj *self, PyObject *field, int quoted)
   1129 {
   1130     unsigned int field_kind = -1;
   1131     void *field_data = NULL;
   1132     Py_ssize_t field_len = 0;
   1133     Py_ssize_t rec_len;
   1134 
   1135     if (field != NULL) {
   1136         if (PyUnicode_READY(field) == -1)
   1137             return 0;
   1138         field_kind = PyUnicode_KIND(field);
   1139         field_data = PyUnicode_DATA(field);
   1140         field_len = PyUnicode_GET_LENGTH(field);
   1141     }
   1142     rec_len = join_append_data(self, field_kind, field_data, field_len,
   1143                                &quoted, 0);
   1144     if (rec_len < 0)
   1145         return 0;
   1146 
   1147     /* grow record buffer if necessary */
   1148     if (!join_check_rec_size(self, rec_len))
   1149         return 0;
   1150 
   1151     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
   1152                                      &quoted, 1);
   1153     self->num_fields++;
   1154 
   1155     return 1;
   1156 }
   1157 
   1158 static int
   1159 join_append_lineterminator(WriterObj *self)
   1160 {
   1161     Py_ssize_t terminator_len, i;
   1162     unsigned int term_kind;
   1163     void *term_data;
   1164 
   1165     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
   1166     if (terminator_len == -1)
   1167         return 0;
   1168 
   1169     /* grow record buffer if necessary */
   1170     if (!join_check_rec_size(self, self->rec_len + terminator_len))
   1171         return 0;
   1172 
   1173     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
   1174     term_data = PyUnicode_DATA(self->dialect->lineterminator);
   1175     for (i = 0; i < terminator_len; i++)
   1176         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
   1177     self->rec_len += terminator_len;
   1178 
   1179     return 1;
   1180 }
   1181 
   1182 PyDoc_STRVAR(csv_writerow_doc,
   1183 "writerow(iterable)\n"
   1184 "\n"
   1185 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
   1186 "elements will be converted to string.");
   1187 
   1188 static PyObject *
   1189 csv_writerow(WriterObj *self, PyObject *seq)
   1190 {
   1191     DialectObj *dialect = self->dialect;
   1192     PyObject *iter, *field, *line, *result;
   1193 
   1194     iter = PyObject_GetIter(seq);
   1195     if (iter == NULL)
   1196         return PyErr_Format(_csvstate_global->error_obj,
   1197                             "iterable expected, not %.200s",
   1198                             seq->ob_type->tp_name);
   1199 
   1200     /* Join all fields in internal buffer.
   1201      */
   1202     join_reset(self);
   1203     while ((field = PyIter_Next(iter))) {
   1204         int append_ok;
   1205         int quoted;
   1206 
   1207         switch (dialect->quoting) {
   1208         case QUOTE_NONNUMERIC:
   1209             quoted = !PyNumber_Check(field);
   1210             break;
   1211         case QUOTE_ALL:
   1212             quoted = 1;
   1213             break;
   1214         default:
   1215             quoted = 0;
   1216             break;
   1217         }
   1218 
   1219         if (PyUnicode_Check(field)) {
   1220             append_ok = join_append(self, field, quoted);
   1221             Py_DECREF(field);
   1222         }
   1223         else if (field == Py_None) {
   1224             append_ok = join_append(self, NULL, quoted);
   1225             Py_DECREF(field);
   1226         }
   1227         else {
   1228             PyObject *str;
   1229 
   1230             str = PyObject_Str(field);
   1231             Py_DECREF(field);
   1232             if (str == NULL) {
   1233                 Py_DECREF(iter);
   1234                 return NULL;
   1235             }
   1236             append_ok = join_append(self, str, quoted);
   1237             Py_DECREF(str);
   1238         }
   1239         if (!append_ok) {
   1240             Py_DECREF(iter);
   1241             return NULL;
   1242         }
   1243     }
   1244     Py_DECREF(iter);
   1245     if (PyErr_Occurred())
   1246         return NULL;
   1247 
   1248     if (self->num_fields > 0 && self->rec_size == 0) {
   1249         if (dialect->quoting == QUOTE_NONE) {
   1250             PyErr_Format(_csvstate_global->error_obj,
   1251                 "single empty field record must be quoted");
   1252             return NULL;
   1253         }
   1254         self->num_fields--;
   1255         if (!join_append(self, NULL, 1))
   1256             return NULL;
   1257     }
   1258 
   1259     /* Add line terminator.
   1260      */
   1261     if (!join_append_lineterminator(self))
   1262         return NULL;
   1263 
   1264     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
   1265                                      (void *) self->rec, self->rec_len);
   1266     if (line == NULL)
   1267         return NULL;
   1268     result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
   1269     Py_DECREF(line);
   1270     return result;
   1271 }
   1272 
   1273 PyDoc_STRVAR(csv_writerows_doc,
   1274 "writerows(iterable of iterables)\n"
   1275 "\n"
   1276 "Construct and write a series of iterables to a csv file.  Non-string\n"
   1277 "elements will be converted to string.");
   1278 
   1279 static PyObject *
   1280 csv_writerows(WriterObj *self, PyObject *seqseq)
   1281 {
   1282     PyObject *row_iter, *row_obj, *result;
   1283 
   1284     row_iter = PyObject_GetIter(seqseq);
   1285     if (row_iter == NULL) {
   1286         PyErr_SetString(PyExc_TypeError,
   1287                         "writerows() argument must be iterable");
   1288         return NULL;
   1289     }
   1290     while ((row_obj = PyIter_Next(row_iter))) {
   1291         result = csv_writerow(self, row_obj);
   1292         Py_DECREF(row_obj);
   1293         if (!result) {
   1294             Py_DECREF(row_iter);
   1295             return NULL;
   1296         }
   1297         else
   1298              Py_DECREF(result);
   1299     }
   1300     Py_DECREF(row_iter);
   1301     if (PyErr_Occurred())
   1302         return NULL;
   1303     Py_INCREF(Py_None);
   1304     return Py_None;
   1305 }
   1306 
   1307 static struct PyMethodDef Writer_methods[] = {
   1308     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
   1309     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
   1310     { NULL, NULL }
   1311 };
   1312 
   1313 #define W_OFF(x) offsetof(WriterObj, x)
   1314 
   1315 static struct PyMemberDef Writer_memberlist[] = {
   1316     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
   1317     { NULL }
   1318 };
   1319 
   1320 static void
   1321 Writer_dealloc(WriterObj *self)
   1322 {
   1323     PyObject_GC_UnTrack(self);
   1324     Py_XDECREF(self->dialect);
   1325     Py_XDECREF(self->writeline);
   1326     if (self->rec != NULL)
   1327         PyMem_Free(self->rec);
   1328     PyObject_GC_Del(self);
   1329 }
   1330 
   1331 static int
   1332 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
   1333 {
   1334     Py_VISIT(self->dialect);
   1335     Py_VISIT(self->writeline);
   1336     return 0;
   1337 }
   1338 
   1339 static int
   1340 Writer_clear(WriterObj *self)
   1341 {
   1342     Py_CLEAR(self->dialect);
   1343     Py_CLEAR(self->writeline);
   1344     return 0;
   1345 }
   1346 
   1347 PyDoc_STRVAR(Writer_Type_doc,
   1348 "CSV writer\n"
   1349 "\n"
   1350 "Writer objects are responsible for generating tabular data\n"
   1351 "in CSV format from sequence input.\n"
   1352 );
   1353 
   1354 static PyTypeObject Writer_Type = {
   1355     PyVarObject_HEAD_INIT(NULL, 0)
   1356     "_csv.writer",                          /*tp_name*/
   1357     sizeof(WriterObj),                      /*tp_basicsize*/
   1358     0,                                      /*tp_itemsize*/
   1359     /* methods */
   1360     (destructor)Writer_dealloc,             /*tp_dealloc*/
   1361     (printfunc)0,                           /*tp_print*/
   1362     (getattrfunc)0,                         /*tp_getattr*/
   1363     (setattrfunc)0,                         /*tp_setattr*/
   1364     0,                                      /*tp_reserved*/
   1365     (reprfunc)0,                            /*tp_repr*/
   1366     0,                                      /*tp_as_number*/
   1367     0,                                      /*tp_as_sequence*/
   1368     0,                                      /*tp_as_mapping*/
   1369     (hashfunc)0,                            /*tp_hash*/
   1370     (ternaryfunc)0,                         /*tp_call*/
   1371     (reprfunc)0,                            /*tp_str*/
   1372     0,                                      /*tp_getattro*/
   1373     0,                                      /*tp_setattro*/
   1374     0,                                      /*tp_as_buffer*/
   1375     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
   1376         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
   1377     Writer_Type_doc,
   1378     (traverseproc)Writer_traverse,          /*tp_traverse*/
   1379     (inquiry)Writer_clear,                  /*tp_clear*/
   1380     0,                                      /*tp_richcompare*/
   1381     0,                                      /*tp_weaklistoffset*/
   1382     (getiterfunc)0,                         /*tp_iter*/
   1383     (getiterfunc)0,                         /*tp_iternext*/
   1384     Writer_methods,                         /*tp_methods*/
   1385     Writer_memberlist,                      /*tp_members*/
   1386     0,                                      /*tp_getset*/
   1387 };
   1388 
   1389 static PyObject *
   1390 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
   1391 {
   1392     PyObject * output_file, * dialect = NULL;
   1393     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
   1394     _Py_IDENTIFIER(write);
   1395 
   1396     if (!self)
   1397         return NULL;
   1398 
   1399     self->dialect = NULL;
   1400     self->writeline = NULL;
   1401 
   1402     self->rec = NULL;
   1403     self->rec_size = 0;
   1404     self->rec_len = 0;
   1405     self->num_fields = 0;
   1406 
   1407     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
   1408         Py_DECREF(self);
   1409         return NULL;
   1410     }
   1411     self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
   1412     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
   1413         PyErr_SetString(PyExc_TypeError,
   1414                         "argument 1 must have a \"write\" method");
   1415         Py_DECREF(self);
   1416         return NULL;
   1417     }
   1418     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
   1419     if (self->dialect == NULL) {
   1420         Py_DECREF(self);
   1421         return NULL;
   1422     }
   1423     PyObject_GC_Track(self);
   1424     return (PyObject *)self;
   1425 }
   1426 
   1427 /*
   1428  * DIALECT REGISTRY
   1429  */
   1430 static PyObject *
   1431 csv_list_dialects(PyObject *module, PyObject *args)
   1432 {
   1433     return PyDict_Keys(_csvstate_global->dialects);
   1434 }
   1435 
   1436 static PyObject *
   1437 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
   1438 {
   1439     PyObject *name_obj, *dialect_obj = NULL;
   1440     PyObject *dialect;
   1441 
   1442     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
   1443         return NULL;
   1444     if (!PyUnicode_Check(name_obj)) {
   1445         PyErr_SetString(PyExc_TypeError,
   1446                         "dialect name must be a string");
   1447         return NULL;
   1448     }
   1449     if (PyUnicode_READY(name_obj) == -1)
   1450         return NULL;
   1451     dialect = _call_dialect(dialect_obj, kwargs);
   1452     if (dialect == NULL)
   1453         return NULL;
   1454     if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
   1455         Py_DECREF(dialect);
   1456         return NULL;
   1457     }
   1458     Py_DECREF(dialect);
   1459     Py_INCREF(Py_None);
   1460     return Py_None;
   1461 }
   1462 
   1463 static PyObject *
   1464 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
   1465 {
   1466     if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
   1467         return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
   1468     Py_INCREF(Py_None);
   1469     return Py_None;
   1470 }
   1471 
   1472 static PyObject *
   1473 csv_get_dialect(PyObject *module, PyObject *name_obj)
   1474 {
   1475     return get_dialect_from_registry(name_obj);
   1476 }
   1477 
   1478 static PyObject *
   1479 csv_field_size_limit(PyObject *module, PyObject *args)
   1480 {
   1481     PyObject *new_limit = NULL;
   1482     long old_limit = _csvstate_global->field_limit;
   1483 
   1484     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
   1485         return NULL;
   1486     if (new_limit != NULL) {
   1487         if (!PyLong_CheckExact(new_limit)) {
   1488             PyErr_Format(PyExc_TypeError,
   1489                          "limit must be an integer");
   1490             return NULL;
   1491         }
   1492         _csvstate_global->field_limit = PyLong_AsLong(new_limit);
   1493         if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
   1494             _csvstate_global->field_limit = old_limit;
   1495             return NULL;
   1496         }
   1497     }
   1498     return PyLong_FromLong(old_limit);
   1499 }
   1500 
   1501 /*
   1502  * MODULE
   1503  */
   1504 
   1505 PyDoc_STRVAR(csv_module_doc,
   1506 "CSV parsing and writing.\n"
   1507 "\n"
   1508 "This module provides classes that assist in the reading and writing\n"
   1509 "of Comma Separated Value (CSV) files, and implements the interface\n"
   1510 "described by PEP 305.  Although many CSV files are simple to parse,\n"
   1511 "the format is not formally defined by a stable specification and\n"
   1512 "is subtle enough that parsing lines of a CSV file with something\n"
   1513 "like line.split(\",\") is bound to fail.  The module supports three\n"
   1514 "basic APIs: reading, writing, and registration of dialects.\n"
   1515 "\n"
   1516 "\n"
   1517 "DIALECT REGISTRATION:\n"
   1518 "\n"
   1519 "Readers and writers support a dialect argument, which is a convenient\n"
   1520 "handle on a group of settings.  When the dialect argument is a string,\n"
   1521 "it identifies one of the dialects previously registered with the module.\n"
   1522 "If it is a class or instance, the attributes of the argument are used as\n"
   1523 "the settings for the reader or writer:\n"
   1524 "\n"
   1525 "    class excel:\n"
   1526 "        delimiter = ','\n"
   1527 "        quotechar = '\"'\n"
   1528 "        escapechar = None\n"
   1529 "        doublequote = True\n"
   1530 "        skipinitialspace = False\n"
   1531 "        lineterminator = '\\r\\n'\n"
   1532 "        quoting = QUOTE_MINIMAL\n"
   1533 "\n"
   1534 "SETTINGS:\n"
   1535 "\n"
   1536 "    * quotechar - specifies a one-character string to use as the \n"
   1537 "        quoting character.  It defaults to '\"'.\n"
   1538 "    * delimiter - specifies a one-character string to use as the \n"
   1539 "        field separator.  It defaults to ','.\n"
   1540 "    * skipinitialspace - specifies how to interpret whitespace which\n"
   1541 "        immediately follows a delimiter.  It defaults to False, which\n"
   1542 "        means that whitespace immediately following a delimiter is part\n"
   1543 "        of the following field.\n"
   1544 "    * lineterminator -  specifies the character sequence which should \n"
   1545 "        terminate rows.\n"
   1546 "    * quoting - controls when quotes should be generated by the writer.\n"
   1547 "        It can take on any of the following module constants:\n"
   1548 "\n"
   1549 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
   1550 "            field contains either the quotechar or the delimiter\n"
   1551 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
   1552 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
   1553 "            fields which do not parse as integers or floating point\n"
   1554 "            numbers.\n"
   1555 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
   1556 "    * escapechar - specifies a one-character string used to escape \n"
   1557 "        the delimiter when quoting is set to QUOTE_NONE.\n"
   1558 "    * doublequote - controls the handling of quotes inside fields.  When\n"
   1559 "        True, two consecutive quotes are interpreted as one during read,\n"
   1560 "        and when writing, each quote character embedded in the data is\n"
   1561 "        written as two quotes\n");
   1562 
   1563 PyDoc_STRVAR(csv_reader_doc,
   1564 "    csv_reader = reader(iterable [, dialect='excel']\n"
   1565 "                        [optional keyword args])\n"
   1566 "    for row in csv_reader:\n"
   1567 "        process(row)\n"
   1568 "\n"
   1569 "The \"iterable\" argument can be any object that returns a line\n"
   1570 "of input for each iteration, such as a file object or a list.  The\n"
   1571 "optional \"dialect\" parameter is discussed below.  The function\n"
   1572 "also accepts optional keyword arguments which override settings\n"
   1573 "provided by the dialect.\n"
   1574 "\n"
   1575 "The returned object is an iterator.  Each iteration returns a row\n"
   1576 "of the CSV file (which can span multiple input lines).\n");
   1577 
   1578 PyDoc_STRVAR(csv_writer_doc,
   1579 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
   1580 "                            [optional keyword args])\n"
   1581 "    for row in sequence:\n"
   1582 "        csv_writer.writerow(row)\n"
   1583 "\n"
   1584 "    [or]\n"
   1585 "\n"
   1586 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
   1587 "                            [optional keyword args])\n"
   1588 "    csv_writer.writerows(rows)\n"
   1589 "\n"
   1590 "The \"fileobj\" argument can be any object that supports the file API.\n");
   1591 
   1592 PyDoc_STRVAR(csv_list_dialects_doc,
   1593 "Return a list of all know dialect names.\n"
   1594 "    names = csv.list_dialects()");
   1595 
   1596 PyDoc_STRVAR(csv_get_dialect_doc,
   1597 "Return the dialect instance associated with name.\n"
   1598 "    dialect = csv.get_dialect(name)");
   1599 
   1600 PyDoc_STRVAR(csv_register_dialect_doc,
   1601 "Create a mapping from a string name to a dialect class.\n"
   1602 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
   1603 
   1604 PyDoc_STRVAR(csv_unregister_dialect_doc,
   1605 "Delete the name/dialect mapping associated with a string name.\n"
   1606 "    csv.unregister_dialect(name)");
   1607 
   1608 PyDoc_STRVAR(csv_field_size_limit_doc,
   1609 "Sets an upper limit on parsed fields.\n"
   1610 "    csv.field_size_limit([limit])\n"
   1611 "\n"
   1612 "Returns old limit. If limit is not given, no new limit is set and\n"
   1613 "the old limit is returned");
   1614 
   1615 static struct PyMethodDef csv_methods[] = {
   1616     { "reader", (PyCFunction)csv_reader,
   1617         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
   1618     { "writer", (PyCFunction)csv_writer,
   1619         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
   1620     { "list_dialects", (PyCFunction)csv_list_dialects,
   1621         METH_NOARGS, csv_list_dialects_doc},
   1622     { "register_dialect", (PyCFunction)csv_register_dialect,
   1623         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
   1624     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
   1625         METH_O, csv_unregister_dialect_doc},
   1626     { "get_dialect", (PyCFunction)csv_get_dialect,
   1627         METH_O, csv_get_dialect_doc},
   1628     { "field_size_limit", (PyCFunction)csv_field_size_limit,
   1629         METH_VARARGS, csv_field_size_limit_doc},
   1630     { NULL, NULL }
   1631 };
   1632 
   1633 static struct PyModuleDef _csvmodule = {
   1634     PyModuleDef_HEAD_INIT,
   1635     "_csv",
   1636     csv_module_doc,
   1637     sizeof(_csvstate),
   1638     csv_methods,
   1639     NULL,
   1640     _csv_traverse,
   1641     _csv_clear,
   1642     _csv_free
   1643 };
   1644 
   1645 PyMODINIT_FUNC
   1646 PyInit__csv(void)
   1647 {
   1648     PyObject *module;
   1649     const StyleDesc *style;
   1650 
   1651     if (PyType_Ready(&Dialect_Type) < 0)
   1652         return NULL;
   1653 
   1654     if (PyType_Ready(&Reader_Type) < 0)
   1655         return NULL;
   1656 
   1657     if (PyType_Ready(&Writer_Type) < 0)
   1658         return NULL;
   1659 
   1660     /* Create the module and add the functions */
   1661     module = PyModule_Create(&_csvmodule);
   1662     if (module == NULL)
   1663         return NULL;
   1664 
   1665     /* Add version to the module. */
   1666     if (PyModule_AddStringConstant(module, "__version__",
   1667                                    MODULE_VERSION) == -1)
   1668         return NULL;
   1669 
   1670     /* Set the field limit */
   1671     _csvstate(module)->field_limit = 128 * 1024;
   1672     /* Do I still need to add this var to the Module Dict? */
   1673 
   1674     /* Add _dialects dictionary */
   1675     _csvstate(module)->dialects = PyDict_New();
   1676     if (_csvstate(module)->dialects == NULL)
   1677         return NULL;
   1678     Py_INCREF(_csvstate(module)->dialects);
   1679     if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
   1680         return NULL;
   1681 
   1682     /* Add quote styles into dictionary */
   1683     for (style = quote_styles; style->name; style++) {
   1684         if (PyModule_AddIntConstant(module, style->name,
   1685                                     style->style) == -1)
   1686             return NULL;
   1687     }
   1688 
   1689     /* Add the Dialect type */
   1690     Py_INCREF(&Dialect_Type);
   1691     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
   1692         return NULL;
   1693 
   1694     /* Add the CSV exception object to the module. */
   1695     _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
   1696     if (_csvstate(module)->error_obj == NULL)
   1697         return NULL;
   1698     Py_INCREF(_csvstate(module)->error_obj);
   1699     PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
   1700     return module;
   1701 }
   1702