Home | History | Annotate | Download | only in Modules
      1 /* csv module */
      2 
      3 /*
      4 
      5 This module provides the low-level underpinnings of a CSV reading/writing
      6 module.  Users should not use this module directly, but import the csv.py
      7 module instead.
      8 
      9 **** For people modifying this code, please note that as of this writing
     10 **** (2003-03-23), it is intended that this code should work with Python
     11 **** 2.2.
     12 
     13 */
     14 
     15 #define MODULE_VERSION "1.0"
     16 
     17 #include "Python.h"
     18 #include "structmember.h"
     19 
     20 
     21 /* begin 2.2 compatibility macros */
     22 #ifndef PyDoc_STRVAR
     23 /* Define macros for inline documentation. */
     24 #define PyDoc_VAR(name) static char name[]
     25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
     26 #ifdef WITH_DOC_STRINGS
     27 #define PyDoc_STR(str) str
     28 #else
     29 #define PyDoc_STR(str) ""
     30 #endif
     31 #endif /* ifndef PyDoc_STRVAR */
     32 
     33 #ifndef PyMODINIT_FUNC
     34 #       if defined(__cplusplus)
     35 #               define PyMODINIT_FUNC extern "C" void
     36 #       else /* __cplusplus */
     37 #               define PyMODINIT_FUNC void
     38 #       endif /* __cplusplus */
     39 #endif
     40 
     41 #ifndef Py_CLEAR
     42 #define Py_CLEAR(op)                                            \
     43     do {                                                        \
     44         if (op) {                                               \
     45             PyObject *tmp = (PyObject *)(op);                   \
     46             (op) = NULL;                                        \
     47             Py_DECREF(tmp);                                     \
     48         }                                                       \
     49     } while (0)
     50 #endif
     51 #ifndef Py_VISIT
     52 #define Py_VISIT(op)                                                    \
     53     do {                                                                \
     54         if (op) {                                                       \
     55             int vret = visit((PyObject *)(op), arg);                    \
     56             if (vret)                                                   \
     57                 return vret;                                            \
     58         }                                                               \
     59     } while (0)
     60 #endif
     61 
     62 /* end 2.2 compatibility macros */
     63 
     64 #define IS_BASESTRING(o) \
     65     PyObject_TypeCheck(o, &PyBaseString_Type)
     66 
     67 static PyObject *error_obj;     /* CSV exception */
     68 static PyObject *dialects;      /* Dialect registry */
     69 static long field_limit = 128 * 1024;   /* max parsed field size */
     70 
     71 typedef enum {
     72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
     73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
     74     EAT_CRNL
     75 } ParserState;
     76 
     77 typedef enum {
     78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
     79 } QuoteStyle;
     80 
     81 typedef struct {
     82     QuoteStyle style;
     83     char *name;
     84 } StyleDesc;
     85 
     86 static StyleDesc quote_styles[] = {
     87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
     88     { QUOTE_ALL,        "QUOTE_ALL" },
     89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
     90     { QUOTE_NONE,       "QUOTE_NONE" },
     91     { 0 }
     92 };
     93 
     94 typedef struct {
     95     PyObject_HEAD
     96 
     97     int doublequote;            /* is " represented by ""? */
     98     char delimiter;             /* field separator */
     99     char quotechar;             /* quote character */
    100     char escapechar;            /* escape character */
    101     int skipinitialspace;       /* ignore spaces following delimiter? */
    102     PyObject *lineterminator; /* string to write between records */
    103     int quoting;                /* style of quoting to write */
    104 
    105     int strict;                 /* raise exception on bad CSV */
    106 } DialectObj;
    107 
    108 staticforward PyTypeObject Dialect_Type;
    109 
    110 typedef struct {
    111     PyObject_HEAD
    112 
    113     PyObject *input_iter;   /* iterate over this for input lines */
    114 
    115     DialectObj *dialect;    /* parsing dialect */
    116 
    117     PyObject *fields;           /* field list for current record */
    118     ParserState state;          /* current CSV parse state */
    119     char *field;                /* build current field in here */
    120     int field_size;             /* size of allocated buffer */
    121     int field_len;              /* length of current field */
    122     int numeric_field;          /* treat field as numeric */
    123     unsigned long line_num;     /* Source-file line number */
    124 } ReaderObj;
    125 
    126 staticforward PyTypeObject Reader_Type;
    127 
    128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
    129 
    130 typedef struct {
    131     PyObject_HEAD
    132 
    133     PyObject *writeline;    /* write output lines to this file */
    134 
    135     DialectObj *dialect;    /* parsing dialect */
    136 
    137     char *rec;                  /* buffer for parser.join */
    138     int rec_size;               /* size of allocated record */
    139     int rec_len;                /* length of record */
    140     int num_fields;             /* number of fields in record */
    141 } WriterObj;
    142 
    143 staticforward PyTypeObject Writer_Type;
    144 
    145 /*
    146  * DIALECT class
    147  */
    148 
    149 static PyObject *
    150 get_dialect_from_registry(PyObject * name_obj)
    151 {
    152     PyObject *dialect_obj;
    153 
    154     dialect_obj = PyDict_GetItem(dialects, name_obj);
    155     if (dialect_obj == NULL) {
    156         if (!PyErr_Occurred())
    157             PyErr_Format(error_obj, "unknown dialect");
    158     }
    159     else
    160         Py_INCREF(dialect_obj);
    161     return dialect_obj;
    162 }
    163 
    164 static PyObject *
    165 get_string(PyObject *str)
    166 {
    167     Py_XINCREF(str);
    168     return str;
    169 }
    170 
    171 static PyObject *
    172 get_nullchar_as_None(char c)
    173 {
    174     if (c == '\0') {
    175         Py_INCREF(Py_None);
    176         return Py_None;
    177     }
    178     else
    179         return PyString_FromStringAndSize((char*)&c, 1);
    180 }
    181 
    182 static PyObject *
    183 Dialect_get_lineterminator(DialectObj *self)
    184 {
    185     return get_string(self->lineterminator);
    186 }
    187 
    188 static PyObject *
    189 Dialect_get_escapechar(DialectObj *self)
    190 {
    191     return get_nullchar_as_None(self->escapechar);
    192 }
    193 
    194 static PyObject *
    195 Dialect_get_quotechar(DialectObj *self)
    196 {
    197     return get_nullchar_as_None(self->quotechar);
    198 }
    199 
    200 static PyObject *
    201 Dialect_get_quoting(DialectObj *self)
    202 {
    203     return PyInt_FromLong(self->quoting);
    204 }
    205 
    206 static int
    207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
    208 {
    209     if (src == NULL)
    210         *target = dflt;
    211     else {
    212         int b = PyObject_IsTrue(src);
    213         if (b < 0)
    214             return -1;
    215         *target = b;
    216     }
    217     return 0;
    218 }
    219 
    220 static int
    221 _set_int(const char *name, int *target, PyObject *src, int dflt)
    222 {
    223     if (src == NULL)
    224         *target = dflt;
    225     else {
    226         if (!PyInt_Check(src)) {
    227             PyErr_Format(PyExc_TypeError,
    228                          "\"%s\" must be an integer", name);
    229             return -1;
    230         }
    231         *target = PyInt_AsLong(src);
    232     }
    233     return 0;
    234 }
    235 
    236 static int
    237 _set_char(const char *name, char *target, PyObject *src, char dflt)
    238 {
    239     if (src == NULL)
    240         *target = dflt;
    241     else {
    242         *target = '\0';
    243         if (src != Py_None) {
    244             Py_ssize_t len;
    245             if (!PyString_Check(src)) {
    246                 PyErr_Format(PyExc_TypeError,
    247                     "\"%s\" must be string, not %.200s", name,
    248                     src->ob_type->tp_name);
    249                 return -1;
    250             }
    251             len = PyString_GET_SIZE(src);
    252             if (len > 1) {
    253                 PyErr_Format(PyExc_TypeError,
    254                     "\"%s\" must be an 1-character string",
    255                     name);
    256                 return -1;
    257             }
    258             if (len > 0)
    259                 *target = *PyString_AS_STRING(src);
    260         }
    261     }
    262     return 0;
    263 }
    264 
    265 static int
    266 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
    267 {
    268     if (src == NULL)
    269         *target = PyString_FromString(dflt);
    270     else {
    271         if (src == Py_None)
    272             *target = NULL;
    273         else if (!IS_BASESTRING(src)) {
    274             PyErr_Format(PyExc_TypeError,
    275                          "\"%s\" must be a string", name);
    276             return -1;
    277         }
    278         else {
    279             Py_XDECREF(*target);
    280             Py_INCREF(src);
    281             *target = src;
    282         }
    283     }
    284     return 0;
    285 }
    286 
    287 static int
    288 dialect_check_quoting(int quoting)
    289 {
    290     StyleDesc *qs = quote_styles;
    291 
    292     for (qs = quote_styles; qs->name; qs++) {
    293         if (qs->style == quoting)
    294             return 0;
    295     }
    296     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
    297     return -1;
    298 }
    299 
    300 #define D_OFF(x) offsetof(DialectObj, x)
    301 
    302 static struct PyMemberDef Dialect_memberlist[] = {
    303     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
    304     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
    305     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
    306     { "strict",             T_INT, D_OFF(strict), READONLY },
    307     { NULL }
    308 };
    309 
    310 static PyGetSetDef Dialect_getsetlist[] = {
    311     { "escapechar",             (getter)Dialect_get_escapechar},
    312     { "lineterminator",         (getter)Dialect_get_lineterminator},
    313     { "quotechar",              (getter)Dialect_get_quotechar},
    314     { "quoting",                (getter)Dialect_get_quoting},
    315     {NULL},
    316 };
    317 
    318 static void
    319 Dialect_dealloc(DialectObj *self)
    320 {
    321     Py_XDECREF(self->lineterminator);
    322     Py_TYPE(self)->tp_free((PyObject *)self);
    323 }
    324 
    325 static char *dialect_kws[] = {
    326     "dialect",
    327     "delimiter",
    328     "doublequote",
    329     "escapechar",
    330     "lineterminator",
    331     "quotechar",
    332     "quoting",
    333     "skipinitialspace",
    334     "strict",
    335     NULL
    336 };
    337 
    338 static PyObject *
    339 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
    340 {
    341     DialectObj *self;
    342     PyObject *ret = NULL;
    343     PyObject *dialect = NULL;
    344     PyObject *delimiter = NULL;
    345     PyObject *doublequote = NULL;
    346     PyObject *escapechar = NULL;
    347     PyObject *lineterminator = NULL;
    348     PyObject *quotechar = NULL;
    349     PyObject *quoting = NULL;
    350     PyObject *skipinitialspace = NULL;
    351     PyObject *strict = NULL;
    352 
    353     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
    354                                      "|OOOOOOOOO", dialect_kws,
    355                                      &dialect,
    356                                      &delimiter,
    357                                      &doublequote,
    358                                      &escapechar,
    359                                      &lineterminator,
    360                                      &quotechar,
    361                                      &quoting,
    362                                      &skipinitialspace,
    363                                      &strict))
    364         return NULL;
    365 
    366     if (dialect != NULL) {
    367         if (IS_BASESTRING(dialect)) {
    368             dialect = get_dialect_from_registry(dialect);
    369             if (dialect == NULL)
    370                 return NULL;
    371         }
    372         else
    373             Py_INCREF(dialect);
    374         /* Can we reuse this instance? */
    375         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
    376             delimiter == 0 &&
    377             doublequote == 0 &&
    378             escapechar == 0 &&
    379             lineterminator == 0 &&
    380             quotechar == 0 &&
    381             quoting == 0 &&
    382             skipinitialspace == 0 &&
    383             strict == 0)
    384             return dialect;
    385     }
    386 
    387     self = (DialectObj *)type->tp_alloc(type, 0);
    388     if (self == NULL) {
    389         Py_XDECREF(dialect);
    390         return NULL;
    391     }
    392     self->lineterminator = NULL;
    393 
    394     Py_XINCREF(delimiter);
    395     Py_XINCREF(doublequote);
    396     Py_XINCREF(escapechar);
    397     Py_XINCREF(lineterminator);
    398     Py_XINCREF(quotechar);
    399     Py_XINCREF(quoting);
    400     Py_XINCREF(skipinitialspace);
    401     Py_XINCREF(strict);
    402     if (dialect != NULL) {
    403 #define DIALECT_GETATTR(v, n) \
    404         if (v == NULL) \
    405             v = PyObject_GetAttrString(dialect, n)
    406         DIALECT_GETATTR(delimiter, "delimiter");
    407         DIALECT_GETATTR(doublequote, "doublequote");
    408         DIALECT_GETATTR(escapechar, "escapechar");
    409         DIALECT_GETATTR(lineterminator, "lineterminator");
    410         DIALECT_GETATTR(quotechar, "quotechar");
    411         DIALECT_GETATTR(quoting, "quoting");
    412         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
    413         DIALECT_GETATTR(strict, "strict");
    414         PyErr_Clear();
    415     }
    416 
    417     /* check types and convert to C values */
    418 #define DIASET(meth, name, target, src, dflt) \
    419     if (meth(name, target, src, dflt)) \
    420         goto err
    421     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
    422     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
    423     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
    424     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
    425     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
    426     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
    427     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
    428     DIASET(_set_bool, "strict", &self->strict, strict, 0);
    429 
    430     /* validate options */
    431     if (dialect_check_quoting(self->quoting))
    432         goto err;
    433     if (self->delimiter == 0) {
    434         PyErr_SetString(PyExc_TypeError,
    435                         "\"delimiter\" must be an 1-character string");
    436         goto err;
    437     }
    438     if (quotechar == Py_None && quoting == NULL)
    439         self->quoting = QUOTE_NONE;
    440     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
    441         PyErr_SetString(PyExc_TypeError,
    442                         "quotechar must be set if quoting enabled");
    443         goto err;
    444     }
    445     if (self->lineterminator == 0) {
    446         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
    447         goto err;
    448     }
    449 
    450     ret = (PyObject *)self;
    451     Py_INCREF(self);
    452 err:
    453     Py_XDECREF(self);
    454     Py_XDECREF(dialect);
    455     Py_XDECREF(delimiter);
    456     Py_XDECREF(doublequote);
    457     Py_XDECREF(escapechar);
    458     Py_XDECREF(lineterminator);
    459     Py_XDECREF(quotechar);
    460     Py_XDECREF(quoting);
    461     Py_XDECREF(skipinitialspace);
    462     Py_XDECREF(strict);
    463     return ret;
    464 }
    465 
    466 
    467 PyDoc_STRVAR(Dialect_Type_doc,
    468 "CSV dialect\n"
    469 "\n"
    470 "The Dialect type records CSV parsing and generation options.\n");
    471 
    472 static PyTypeObject Dialect_Type = {
    473     PyVarObject_HEAD_INIT(NULL, 0)
    474     "_csv.Dialect",                         /* tp_name */
    475     sizeof(DialectObj),                     /* tp_basicsize */
    476     0,                                      /* tp_itemsize */
    477     /*  methods  */
    478     (destructor)Dialect_dealloc,            /* tp_dealloc */
    479     (printfunc)0,                           /* tp_print */
    480     (getattrfunc)0,                         /* tp_getattr */
    481     (setattrfunc)0,                         /* tp_setattr */
    482     (cmpfunc)0,                             /* tp_compare */
    483     (reprfunc)0,                            /* tp_repr */
    484     0,                                      /* tp_as_number */
    485     0,                                      /* tp_as_sequence */
    486     0,                                      /* tp_as_mapping */
    487     (hashfunc)0,                            /* tp_hash */
    488     (ternaryfunc)0,                         /* tp_call */
    489     (reprfunc)0,                                /* tp_str */
    490     0,                                      /* tp_getattro */
    491     0,                                      /* tp_setattro */
    492     0,                                      /* tp_as_buffer */
    493     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
    494     Dialect_Type_doc,                       /* tp_doc */
    495     0,                                      /* tp_traverse */
    496     0,                                      /* tp_clear */
    497     0,                                      /* tp_richcompare */
    498     0,                                      /* tp_weaklistoffset */
    499     0,                                      /* tp_iter */
    500     0,                                      /* tp_iternext */
    501     0,                                          /* tp_methods */
    502     Dialect_memberlist,                     /* tp_members */
    503     Dialect_getsetlist,                     /* tp_getset */
    504     0,                                          /* tp_base */
    505     0,                                          /* tp_dict */
    506     0,                                          /* tp_descr_get */
    507     0,                                          /* tp_descr_set */
    508     0,                                          /* tp_dictoffset */
    509     0,                                          /* tp_init */
    510     0,                                          /* tp_alloc */
    511     dialect_new,                                /* tp_new */
    512     0,                                          /* tp_free */
    513 };
    514 
    515 /*
    516  * Return an instance of the dialect type, given a Python instance or kwarg
    517  * description of the dialect
    518  */
    519 static PyObject *
    520 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
    521 {
    522     PyObject *ctor_args;
    523     PyObject *dialect;
    524 
    525     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
    526     if (ctor_args == NULL)
    527         return NULL;
    528     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
    529     Py_DECREF(ctor_args);
    530     return dialect;
    531 }
    532 
    533 /*
    534  * READER
    535  */
    536 static int
    537 parse_save_field(ReaderObj *self)
    538 {
    539     PyObject *field;
    540 
    541     field = PyString_FromStringAndSize(self->field, self->field_len);
    542     if (field == NULL)
    543         return -1;
    544     self->field_len = 0;
    545     if (self->numeric_field) {
    546         PyObject *tmp;
    547 
    548         self->numeric_field = 0;
    549         tmp = PyNumber_Float(field);
    550         if (tmp == NULL) {
    551             Py_DECREF(field);
    552             return -1;
    553         }
    554         Py_DECREF(field);
    555         field = tmp;
    556     }
    557     PyList_Append(self->fields, field);
    558     Py_DECREF(field);
    559     return 0;
    560 }
    561 
    562 static int
    563 parse_grow_buff(ReaderObj *self)
    564 {
    565     if (self->field_size == 0) {
    566         self->field_size = 4096;
    567         if (self->field != NULL)
    568             PyMem_Free(self->field);
    569         self->field = PyMem_Malloc(self->field_size);
    570     }
    571     else {
    572         if (self->field_size > INT_MAX / 2) {
    573             PyErr_NoMemory();
    574             return 0;
    575         }
    576         self->field_size *= 2;
    577         self->field = PyMem_Realloc(self->field, self->field_size);
    578     }
    579     if (self->field == NULL) {
    580         PyErr_NoMemory();
    581         return 0;
    582     }
    583     return 1;
    584 }
    585 
    586 static int
    587 parse_add_char(ReaderObj *self, char c)
    588 {
    589     if (self->field_len >= field_limit) {
    590         PyErr_Format(error_obj, "field larger than field limit (%ld)",
    591                      field_limit);
    592         return -1;
    593     }
    594     if (self->field_len == self->field_size && !parse_grow_buff(self))
    595         return -1;
    596     self->field[self->field_len++] = c;
    597     return 0;
    598 }
    599 
    600 static int
    601 parse_process_char(ReaderObj *self, char c)
    602 {
    603     DialectObj *dialect = self->dialect;
    604 
    605     switch (self->state) {
    606     case START_RECORD:
    607         /* start of record */
    608         if (c == '\0')
    609             /* empty line - return [] */
    610             break;
    611         else if (c == '\n' || c == '\r') {
    612             self->state = EAT_CRNL;
    613             break;
    614         }
    615         /* normal character - handle as START_FIELD */
    616         self->state = START_FIELD;
    617         /* fallthru */
    618     case START_FIELD:
    619         /* expecting field */
    620         if (c == '\n' || c == '\r' || c == '\0') {
    621             /* save empty field - return [fields] */
    622             if (parse_save_field(self) < 0)
    623                 return -1;
    624             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
    625         }
    626         else if (c == dialect->quotechar &&
    627                  dialect->quoting != QUOTE_NONE) {
    628             /* start quoted field */
    629             self->state = IN_QUOTED_FIELD;
    630         }
    631         else if (c == dialect->escapechar) {
    632             /* possible escaped character */
    633             self->state = ESCAPED_CHAR;
    634         }
    635         else if (c == ' ' && dialect->skipinitialspace)
    636             /* ignore space at start of field */
    637             ;
    638         else if (c == dialect->delimiter) {
    639             /* save empty field */
    640             if (parse_save_field(self) < 0)
    641                 return -1;
    642         }
    643         else {
    644             /* begin new unquoted field */
    645             if (dialect->quoting == QUOTE_NONNUMERIC)
    646                 self->numeric_field = 1;
    647             if (parse_add_char(self, c) < 0)
    648                 return -1;
    649             self->state = IN_FIELD;
    650         }
    651         break;
    652 
    653     case ESCAPED_CHAR:
    654         if (c == '\0')
    655             c = '\n';
    656         if (parse_add_char(self, c) < 0)
    657             return -1;
    658         self->state = IN_FIELD;
    659         break;
    660 
    661     case IN_FIELD:
    662         /* in unquoted field */
    663         if (c == '\n' || c == '\r' || c == '\0') {
    664             /* end of line - return [fields] */
    665             if (parse_save_field(self) < 0)
    666                 return -1;
    667             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
    668         }
    669         else if (c == dialect->escapechar) {
    670             /* possible escaped character */
    671             self->state = ESCAPED_CHAR;
    672         }
    673         else if (c == dialect->delimiter) {
    674             /* save field - wait for new field */
    675             if (parse_save_field(self) < 0)
    676                 return -1;
    677             self->state = START_FIELD;
    678         }
    679         else {
    680             /* normal character - save in field */
    681             if (parse_add_char(self, c) < 0)
    682                 return -1;
    683         }
    684         break;
    685 
    686     case IN_QUOTED_FIELD:
    687         /* in quoted field */
    688         if (c == '\0')
    689             ;
    690         else if (c == dialect->escapechar) {
    691             /* Possible escape character */
    692             self->state = ESCAPE_IN_QUOTED_FIELD;
    693         }
    694         else if (c == dialect->quotechar &&
    695                  dialect->quoting != QUOTE_NONE) {
    696             if (dialect->doublequote) {
    697                 /* doublequote; " represented by "" */
    698                 self->state = QUOTE_IN_QUOTED_FIELD;
    699             }
    700             else {
    701                 /* end of quote part of field */
    702                 self->state = IN_FIELD;
    703             }
    704         }
    705         else {
    706             /* normal character - save in field */
    707             if (parse_add_char(self, c) < 0)
    708                 return -1;
    709         }
    710         break;
    711 
    712     case ESCAPE_IN_QUOTED_FIELD:
    713         if (c == '\0')
    714             c = '\n';
    715         if (parse_add_char(self, c) < 0)
    716             return -1;
    717         self->state = IN_QUOTED_FIELD;
    718         break;
    719 
    720     case QUOTE_IN_QUOTED_FIELD:
    721         /* doublequote - seen a quote in an quoted field */
    722         if (dialect->quoting != QUOTE_NONE &&
    723             c == dialect->quotechar) {
    724             /* save "" as " */
    725             if (parse_add_char(self, c) < 0)
    726                 return -1;
    727             self->state = IN_QUOTED_FIELD;
    728         }
    729         else if (c == dialect->delimiter) {
    730             /* save field - wait for new field */
    731             if (parse_save_field(self) < 0)
    732                 return -1;
    733             self->state = START_FIELD;
    734         }
    735         else if (c == '\n' || c == '\r' || c == '\0') {
    736             /* end of line - return [fields] */
    737             if (parse_save_field(self) < 0)
    738                 return -1;
    739             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
    740         }
    741         else if (!dialect->strict) {
    742             if (parse_add_char(self, c) < 0)
    743                 return -1;
    744             self->state = IN_FIELD;
    745         }
    746         else {
    747             /* illegal */
    748             PyErr_Format(error_obj, "'%c' expected after '%c'",
    749                             dialect->delimiter,
    750                             dialect->quotechar);
    751             return -1;
    752         }
    753         break;
    754 
    755     case EAT_CRNL:
    756         if (c == '\n' || c == '\r')
    757             ;
    758         else if (c == '\0')
    759             self->state = START_RECORD;
    760         else {
    761             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
    762             return -1;
    763         }
    764         break;
    765 
    766     }
    767     return 0;
    768 }
    769 
    770 static int
    771 parse_reset(ReaderObj *self)
    772 {
    773     Py_XDECREF(self->fields);
    774     self->fields = PyList_New(0);
    775     if (self->fields == NULL)
    776         return -1;
    777     self->field_len = 0;
    778     self->state = START_RECORD;
    779     self->numeric_field = 0;
    780     return 0;
    781 }
    782 
    783 static PyObject *
    784 Reader_iternext(ReaderObj *self)
    785 {
    786     PyObject *lineobj;
    787     PyObject *fields = NULL;
    788     char *line, c;
    789     int linelen;
    790 
    791     if (parse_reset(self) < 0)
    792         return NULL;
    793     do {
    794         lineobj = PyIter_Next(self->input_iter);
    795         if (lineobj == NULL) {
    796             /* End of input OR exception */
    797             if (!PyErr_Occurred() && (self->field_len != 0 ||
    798                                       self->state == IN_QUOTED_FIELD)) {
    799                 if (self->dialect->strict)
    800                     PyErr_SetString(error_obj, "unexpected end of data");
    801                 else if (parse_save_field(self) >= 0 )
    802                     break;
    803             }
    804             return NULL;
    805         }
    806         ++self->line_num;
    807 
    808         line = PyString_AsString(lineobj);
    809         linelen = PyString_Size(lineobj);
    810 
    811         if (line == NULL || linelen < 0) {
    812             Py_DECREF(lineobj);
    813             return NULL;
    814         }
    815         while (linelen--) {
    816             c = *line++;
    817             if (c == '\0') {
    818                 Py_DECREF(lineobj);
    819                 PyErr_Format(error_obj,
    820                              "line contains NULL byte");
    821                 goto err;
    822             }
    823             if (parse_process_char(self, c) < 0) {
    824                 Py_DECREF(lineobj);
    825                 goto err;
    826             }
    827         }
    828         Py_DECREF(lineobj);
    829         if (parse_process_char(self, 0) < 0)
    830             goto err;
    831     } while (self->state != START_RECORD);
    832 
    833     fields = self->fields;
    834     self->fields = NULL;
    835 err:
    836     return fields;
    837 }
    838 
    839 static void
    840 Reader_dealloc(ReaderObj *self)
    841 {
    842     PyObject_GC_UnTrack(self);
    843     Py_XDECREF(self->dialect);
    844     Py_XDECREF(self->input_iter);
    845     Py_XDECREF(self->fields);
    846     if (self->field != NULL)
    847         PyMem_Free(self->field);
    848     PyObject_GC_Del(self);
    849 }
    850 
    851 static int
    852 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
    853 {
    854     Py_VISIT(self->dialect);
    855     Py_VISIT(self->input_iter);
    856     Py_VISIT(self->fields);
    857     return 0;
    858 }
    859 
    860 static int
    861 Reader_clear(ReaderObj *self)
    862 {
    863     Py_CLEAR(self->dialect);
    864     Py_CLEAR(self->input_iter);
    865     Py_CLEAR(self->fields);
    866     return 0;
    867 }
    868 
    869 PyDoc_STRVAR(Reader_Type_doc,
    870 "CSV reader\n"
    871 "\n"
    872 "Reader objects are responsible for reading and parsing tabular data\n"
    873 "in CSV format.\n"
    874 );
    875 
    876 static struct PyMethodDef Reader_methods[] = {
    877     { NULL, NULL }
    878 };
    879 #define R_OFF(x) offsetof(ReaderObj, x)
    880 
    881 static struct PyMemberDef Reader_memberlist[] = {
    882     { "dialect", T_OBJECT, R_OFF(dialect), RO },
    883     { "line_num", T_ULONG, R_OFF(line_num), RO },
    884     { NULL }
    885 };
    886 
    887 
    888 static PyTypeObject Reader_Type = {
    889     PyVarObject_HEAD_INIT(NULL, 0)
    890     "_csv.reader",                          /*tp_name*/
    891     sizeof(ReaderObj),                      /*tp_basicsize*/
    892     0,                                      /*tp_itemsize*/
    893     /* methods */
    894     (destructor)Reader_dealloc,             /*tp_dealloc*/
    895     (printfunc)0,                           /*tp_print*/
    896     (getattrfunc)0,                         /*tp_getattr*/
    897     (setattrfunc)0,                         /*tp_setattr*/
    898     (cmpfunc)0,                             /*tp_compare*/
    899     (reprfunc)0,                            /*tp_repr*/
    900     0,                                      /*tp_as_number*/
    901     0,                                      /*tp_as_sequence*/
    902     0,                                      /*tp_as_mapping*/
    903     (hashfunc)0,                            /*tp_hash*/
    904     (ternaryfunc)0,                         /*tp_call*/
    905     (reprfunc)0,                                /*tp_str*/
    906     0,                                      /*tp_getattro*/
    907     0,                                      /*tp_setattro*/
    908     0,                                      /*tp_as_buffer*/
    909     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
    910         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
    911     Reader_Type_doc,                        /*tp_doc*/
    912     (traverseproc)Reader_traverse,          /*tp_traverse*/
    913     (inquiry)Reader_clear,                  /*tp_clear*/
    914     0,                                      /*tp_richcompare*/
    915     0,                                      /*tp_weaklistoffset*/
    916     PyObject_SelfIter,                          /*tp_iter*/
    917     (getiterfunc)Reader_iternext,           /*tp_iternext*/
    918     Reader_methods,                         /*tp_methods*/
    919     Reader_memberlist,                      /*tp_members*/
    920     0,                                      /*tp_getset*/
    921 
    922 };
    923 
    924 static PyObject *
    925 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
    926 {
    927     PyObject * iterator, * dialect = NULL;
    928     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
    929 
    930     if (!self)
    931         return NULL;
    932 
    933     self->dialect = NULL;
    934     self->fields = NULL;
    935     self->input_iter = NULL;
    936     self->field = NULL;
    937     self->field_size = 0;
    938     self->line_num = 0;
    939 
    940     if (parse_reset(self) < 0) {
    941         Py_DECREF(self);
    942         return NULL;
    943     }
    944 
    945     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
    946         Py_DECREF(self);
    947         return NULL;
    948     }
    949     self->input_iter = PyObject_GetIter(iterator);
    950     if (self->input_iter == NULL) {
    951         PyErr_SetString(PyExc_TypeError,
    952                         "argument 1 must be an iterator");
    953         Py_DECREF(self);
    954         return NULL;
    955     }
    956     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
    957     if (self->dialect == NULL) {
    958         Py_DECREF(self);
    959         return NULL;
    960     }
    961 
    962     PyObject_GC_Track(self);
    963     return (PyObject *)self;
    964 }
    965 
    966 /*
    967  * WRITER
    968  */
    969 /* ---------------------------------------------------------------- */
    970 static void
    971 join_reset(WriterObj *self)
    972 {
    973     self->rec_len = 0;
    974     self->num_fields = 0;
    975 }
    976 
    977 #define MEM_INCR 32768
    978 
    979 /* Calculate new record length or append field to record.  Return new
    980  * record length.
    981  */
    982 static int
    983 join_append_data(WriterObj *self, char *field, int quote_empty,
    984                  int *quoted, int copy_phase)
    985 {
    986     DialectObj *dialect = self->dialect;
    987     int i, rec_len;
    988     char *lineterm;
    989 
    990 #define ADDCH(c) \
    991     do {\
    992         if (copy_phase) \
    993             self->rec[rec_len] = c;\
    994         rec_len++;\
    995     } while(0)
    996 
    997     lineterm = PyString_AsString(dialect->lineterminator);
    998     if (lineterm == NULL)
    999         return -1;
   1000 
   1001     rec_len = self->rec_len;
   1002 
   1003     /* If this is not the first field we need a field separator */
   1004     if (self->num_fields > 0)
   1005         ADDCH(dialect->delimiter);
   1006 
   1007     /* Handle preceding quote */
   1008     if (copy_phase && *quoted)
   1009         ADDCH(dialect->quotechar);
   1010 
   1011     /* Copy/count field data */
   1012     for (i = 0;; i++) {
   1013         char c = field[i];
   1014         int want_escape = 0;
   1015 
   1016         if (c == '\0')
   1017             break;
   1018 
   1019         if (c == dialect->delimiter ||
   1020             c == dialect->escapechar ||
   1021             c == dialect->quotechar ||
   1022             strchr(lineterm, c)) {
   1023             if (dialect->quoting == QUOTE_NONE)
   1024                 want_escape = 1;
   1025             else {
   1026                 if (c == dialect->quotechar) {
   1027                     if (dialect->doublequote)
   1028                         ADDCH(dialect->quotechar);
   1029                     else
   1030                         want_escape = 1;
   1031                 }
   1032                 if (!want_escape)
   1033                     *quoted = 1;
   1034             }
   1035             if (want_escape) {
   1036                 if (!dialect->escapechar) {
   1037                     PyErr_Format(error_obj,
   1038                                  "need to escape, but no escapechar set");
   1039                     return -1;
   1040                 }
   1041                 ADDCH(dialect->escapechar);
   1042             }
   1043         }
   1044         /* Copy field character into record buffer.
   1045          */
   1046         ADDCH(c);
   1047     }
   1048 
   1049     /* If field is empty check if it needs to be quoted.
   1050      */
   1051     if (i == 0 && quote_empty) {
   1052         if (dialect->quoting == QUOTE_NONE) {
   1053             PyErr_Format(error_obj,
   1054                          "single empty field record must be quoted");
   1055             return -1;
   1056         }
   1057         else
   1058             *quoted = 1;
   1059     }
   1060 
   1061     if (*quoted) {
   1062         if (copy_phase)
   1063             ADDCH(dialect->quotechar);
   1064         else
   1065             rec_len += 2;
   1066     }
   1067     return rec_len;
   1068 #undef ADDCH
   1069 }
   1070 
   1071 static int
   1072 join_check_rec_size(WriterObj *self, int rec_len)
   1073 {
   1074 
   1075     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
   1076         PyErr_NoMemory();
   1077         return 0;
   1078     }
   1079 
   1080     if (rec_len > self->rec_size) {
   1081         if (self->rec_size == 0) {
   1082             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
   1083             if (self->rec != NULL)
   1084                 PyMem_Free(self->rec);
   1085             self->rec = PyMem_Malloc(self->rec_size);
   1086         }
   1087         else {
   1088             char *old_rec = self->rec;
   1089 
   1090             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
   1091             self->rec = PyMem_Realloc(self->rec, self->rec_size);
   1092             if (self->rec == NULL)
   1093                 PyMem_Free(old_rec);
   1094         }
   1095         if (self->rec == NULL) {
   1096             PyErr_NoMemory();
   1097             return 0;
   1098         }
   1099     }
   1100     return 1;
   1101 }
   1102 
   1103 static int
   1104 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
   1105 {
   1106     int rec_len;
   1107 
   1108     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
   1109     if (rec_len < 0)
   1110         return 0;
   1111 
   1112     /* grow record buffer if necessary */
   1113     if (!join_check_rec_size(self, rec_len))
   1114         return 0;
   1115 
   1116     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
   1117     self->num_fields++;
   1118 
   1119     return 1;
   1120 }
   1121 
   1122 static int
   1123 join_append_lineterminator(WriterObj *self)
   1124 {
   1125     int terminator_len;
   1126     char *terminator;
   1127 
   1128     terminator_len = PyString_Size(self->dialect->lineterminator);
   1129     if (terminator_len == -1)
   1130         return 0;
   1131 
   1132     /* grow record buffer if necessary */
   1133     if (!join_check_rec_size(self, self->rec_len + terminator_len))
   1134         return 0;
   1135 
   1136     terminator = PyString_AsString(self->dialect->lineterminator);
   1137     if (terminator == NULL)
   1138         return 0;
   1139     memmove(self->rec + self->rec_len, terminator, terminator_len);
   1140     self->rec_len += terminator_len;
   1141 
   1142     return 1;
   1143 }
   1144 
   1145 PyDoc_STRVAR(csv_writerow_doc,
   1146 "writerow(sequence)\n"
   1147 "\n"
   1148 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
   1149 "elements will be converted to string.");
   1150 
   1151 static PyObject *
   1152 csv_writerow(WriterObj *self, PyObject *seq)
   1153 {
   1154     DialectObj *dialect = self->dialect;
   1155     int len, i;
   1156 
   1157     if (!PySequence_Check(seq))
   1158         return PyErr_Format(error_obj, "sequence expected");
   1159 
   1160     len = PySequence_Length(seq);
   1161     if (len < 0)
   1162         return NULL;
   1163 
   1164     /* Join all fields in internal buffer.
   1165      */
   1166     join_reset(self);
   1167     for (i = 0; i < len; i++) {
   1168         PyObject *field;
   1169         int append_ok;
   1170         int quoted;
   1171 
   1172         field = PySequence_GetItem(seq, i);
   1173         if (field == NULL)
   1174             return NULL;
   1175 
   1176         switch (dialect->quoting) {
   1177         case QUOTE_NONNUMERIC:
   1178             quoted = !PyNumber_Check(field);
   1179             break;
   1180         case QUOTE_ALL:
   1181             quoted = 1;
   1182             break;
   1183         default:
   1184             quoted = 0;
   1185             break;
   1186         }
   1187 
   1188         if (PyString_Check(field)) {
   1189             append_ok = join_append(self,
   1190                                     PyString_AS_STRING(field),
   1191                                     &quoted, len == 1);
   1192             Py_DECREF(field);
   1193         }
   1194         else if (field == Py_None) {
   1195             append_ok = join_append(self, "", &quoted, len == 1);
   1196             Py_DECREF(field);
   1197         }
   1198         else {
   1199             PyObject *str;
   1200 
   1201             if (PyFloat_Check(field)) {
   1202                 str = PyObject_Repr(field);
   1203             } else {
   1204                 str = PyObject_Str(field);
   1205             }
   1206             Py_DECREF(field);
   1207             if (str == NULL)
   1208                 return NULL;
   1209 
   1210             append_ok = join_append(self, PyString_AS_STRING(str),
   1211                                     &quoted, len == 1);
   1212             Py_DECREF(str);
   1213         }
   1214         if (!append_ok)
   1215             return NULL;
   1216     }
   1217 
   1218     /* Add line terminator.
   1219      */
   1220     if (!join_append_lineterminator(self))
   1221         return 0;
   1222 
   1223     return PyObject_CallFunction(self->writeline,
   1224                                  "(s#)", self->rec, self->rec_len);
   1225 }
   1226 
   1227 PyDoc_STRVAR(csv_writerows_doc,
   1228 "writerows(sequence of sequences)\n"
   1229 "\n"
   1230 "Construct and write a series of sequences to a csv file.  Non-string\n"
   1231 "elements will be converted to string.");
   1232 
   1233 static PyObject *
   1234 csv_writerows(WriterObj *self, PyObject *seqseq)
   1235 {
   1236     PyObject *row_iter, *row_obj, *result;
   1237 
   1238     row_iter = PyObject_GetIter(seqseq);
   1239     if (row_iter == NULL) {
   1240         PyErr_SetString(PyExc_TypeError,
   1241                         "writerows() argument must be iterable");
   1242         return NULL;
   1243     }
   1244     while ((row_obj = PyIter_Next(row_iter))) {
   1245         result = csv_writerow(self, row_obj);
   1246         Py_DECREF(row_obj);
   1247         if (!result) {
   1248             Py_DECREF(row_iter);
   1249             return NULL;
   1250         }
   1251         else
   1252              Py_DECREF(result);
   1253     }
   1254     Py_DECREF(row_iter);
   1255     if (PyErr_Occurred())
   1256         return NULL;
   1257     Py_INCREF(Py_None);
   1258     return Py_None;
   1259 }
   1260 
   1261 static struct PyMethodDef Writer_methods[] = {
   1262     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
   1263     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
   1264     { NULL, NULL }
   1265 };
   1266 
   1267 #define W_OFF(x) offsetof(WriterObj, x)
   1268 
   1269 static struct PyMemberDef Writer_memberlist[] = {
   1270     { "dialect", T_OBJECT, W_OFF(dialect), RO },
   1271     { NULL }
   1272 };
   1273 
   1274 static void
   1275 Writer_dealloc(WriterObj *self)
   1276 {
   1277     PyObject_GC_UnTrack(self);
   1278     Py_XDECREF(self->dialect);
   1279     Py_XDECREF(self->writeline);
   1280     if (self->rec != NULL)
   1281         PyMem_Free(self->rec);
   1282     PyObject_GC_Del(self);
   1283 }
   1284 
   1285 static int
   1286 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
   1287 {
   1288     Py_VISIT(self->dialect);
   1289     Py_VISIT(self->writeline);
   1290     return 0;
   1291 }
   1292 
   1293 static int
   1294 Writer_clear(WriterObj *self)
   1295 {
   1296     Py_CLEAR(self->dialect);
   1297     Py_CLEAR(self->writeline);
   1298     return 0;
   1299 }
   1300 
   1301 PyDoc_STRVAR(Writer_Type_doc,
   1302 "CSV writer\n"
   1303 "\n"
   1304 "Writer objects are responsible for generating tabular data\n"
   1305 "in CSV format from sequence input.\n"
   1306 );
   1307 
   1308 static PyTypeObject Writer_Type = {
   1309     PyVarObject_HEAD_INIT(NULL, 0)
   1310     "_csv.writer",                          /*tp_name*/
   1311     sizeof(WriterObj),                      /*tp_basicsize*/
   1312     0,                                      /*tp_itemsize*/
   1313     /* methods */
   1314     (destructor)Writer_dealloc,             /*tp_dealloc*/
   1315     (printfunc)0,                           /*tp_print*/
   1316     (getattrfunc)0,                         /*tp_getattr*/
   1317     (setattrfunc)0,                         /*tp_setattr*/
   1318     (cmpfunc)0,                             /*tp_compare*/
   1319     (reprfunc)0,                            /*tp_repr*/
   1320     0,                                      /*tp_as_number*/
   1321     0,                                      /*tp_as_sequence*/
   1322     0,                                      /*tp_as_mapping*/
   1323     (hashfunc)0,                            /*tp_hash*/
   1324     (ternaryfunc)0,                         /*tp_call*/
   1325     (reprfunc)0,                            /*tp_str*/
   1326     0,                                      /*tp_getattro*/
   1327     0,                                      /*tp_setattro*/
   1328     0,                                      /*tp_as_buffer*/
   1329     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
   1330         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
   1331     Writer_Type_doc,
   1332     (traverseproc)Writer_traverse,          /*tp_traverse*/
   1333     (inquiry)Writer_clear,                  /*tp_clear*/
   1334     0,                                      /*tp_richcompare*/
   1335     0,                                      /*tp_weaklistoffset*/
   1336     (getiterfunc)0,                         /*tp_iter*/
   1337     (getiterfunc)0,                         /*tp_iternext*/
   1338     Writer_methods,                         /*tp_methods*/
   1339     Writer_memberlist,                      /*tp_members*/
   1340     0,                                      /*tp_getset*/
   1341 };
   1342 
   1343 static PyObject *
   1344 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
   1345 {
   1346     PyObject * output_file, * dialect = NULL;
   1347     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
   1348 
   1349     if (!self)
   1350         return NULL;
   1351 
   1352     self->dialect = NULL;
   1353     self->writeline = NULL;
   1354 
   1355     self->rec = NULL;
   1356     self->rec_size = 0;
   1357     self->rec_len = 0;
   1358     self->num_fields = 0;
   1359 
   1360     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
   1361         Py_DECREF(self);
   1362         return NULL;
   1363     }
   1364     self->writeline = PyObject_GetAttrString(output_file, "write");
   1365     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
   1366         PyErr_SetString(PyExc_TypeError,
   1367                         "argument 1 must have a \"write\" method");
   1368         Py_DECREF(self);
   1369         return NULL;
   1370     }
   1371     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
   1372     if (self->dialect == NULL) {
   1373         Py_DECREF(self);
   1374         return NULL;
   1375     }
   1376     PyObject_GC_Track(self);
   1377     return (PyObject *)self;
   1378 }
   1379 
   1380 /*
   1381  * DIALECT REGISTRY
   1382  */
   1383 static PyObject *
   1384 csv_list_dialects(PyObject *module, PyObject *args)
   1385 {
   1386     return PyDict_Keys(dialects);
   1387 }
   1388 
   1389 static PyObject *
   1390 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
   1391 {
   1392     PyObject *name_obj, *dialect_obj = NULL;
   1393     PyObject *dialect;
   1394 
   1395     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
   1396         return NULL;
   1397     if (!IS_BASESTRING(name_obj)) {
   1398         PyErr_SetString(PyExc_TypeError,
   1399                         "dialect name must be a string or unicode");
   1400         return NULL;
   1401     }
   1402     dialect = _call_dialect(dialect_obj, kwargs);
   1403     if (dialect == NULL)
   1404         return NULL;
   1405     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
   1406         Py_DECREF(dialect);
   1407         return NULL;
   1408     }
   1409     Py_DECREF(dialect);
   1410     Py_INCREF(Py_None);
   1411     return Py_None;
   1412 }
   1413 
   1414 static PyObject *
   1415 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
   1416 {
   1417     if (PyDict_DelItem(dialects, name_obj) < 0)
   1418         return PyErr_Format(error_obj, "unknown dialect");
   1419     Py_INCREF(Py_None);
   1420     return Py_None;
   1421 }
   1422 
   1423 static PyObject *
   1424 csv_get_dialect(PyObject *module, PyObject *name_obj)
   1425 {
   1426     return get_dialect_from_registry(name_obj);
   1427 }
   1428 
   1429 static PyObject *
   1430 csv_field_size_limit(PyObject *module, PyObject *args)
   1431 {
   1432     PyObject *new_limit = NULL;
   1433     long old_limit = field_limit;
   1434 
   1435     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
   1436         return NULL;
   1437     if (new_limit != NULL) {
   1438         if (!PyInt_Check(new_limit)) {
   1439             PyErr_Format(PyExc_TypeError,
   1440                          "limit must be an integer");
   1441             return NULL;
   1442         }
   1443         field_limit = PyInt_AsLong(new_limit);
   1444     }
   1445     return PyInt_FromLong(old_limit);
   1446 }
   1447 
   1448 /*
   1449  * MODULE
   1450  */
   1451 
   1452 PyDoc_STRVAR(csv_module_doc,
   1453 "CSV parsing and writing.\n"
   1454 "\n"
   1455 "This module provides classes that assist in the reading and writing\n"
   1456 "of Comma Separated Value (CSV) files, and implements the interface\n"
   1457 "described by PEP 305.  Although many CSV files are simple to parse,\n"
   1458 "the format is not formally defined by a stable specification and\n"
   1459 "is subtle enough that parsing lines of a CSV file with something\n"
   1460 "like line.split(\",\") is bound to fail.  The module supports three\n"
   1461 "basic APIs: reading, writing, and registration of dialects.\n"
   1462 "\n"
   1463 "\n"
   1464 "DIALECT REGISTRATION:\n"
   1465 "\n"
   1466 "Readers and writers support a dialect argument, which is a convenient\n"
   1467 "handle on a group of settings.  When the dialect argument is a string,\n"
   1468 "it identifies one of the dialects previously registered with the module.\n"
   1469 "If it is a class or instance, the attributes of the argument are used as\n"
   1470 "the settings for the reader or writer:\n"
   1471 "\n"
   1472 "    class excel:\n"
   1473 "        delimiter = ','\n"
   1474 "        quotechar = '\"'\n"
   1475 "        escapechar = None\n"
   1476 "        doublequote = True\n"
   1477 "        skipinitialspace = False\n"
   1478 "        lineterminator = '\\r\\n'\n"
   1479 "        quoting = QUOTE_MINIMAL\n"
   1480 "\n"
   1481 "SETTINGS:\n"
   1482 "\n"
   1483 "    * quotechar - specifies a one-character string to use as the \n"
   1484 "        quoting character.  It defaults to '\"'.\n"
   1485 "    * delimiter - specifies a one-character string to use as the \n"
   1486 "        field separator.  It defaults to ','.\n"
   1487 "    * skipinitialspace - specifies how to interpret whitespace which\n"
   1488 "        immediately follows a delimiter.  It defaults to False, which\n"
   1489 "        means that whitespace immediately following a delimiter is part\n"
   1490 "        of the following field.\n"
   1491 "    * lineterminator -  specifies the character sequence which should \n"
   1492 "        terminate rows.\n"
   1493 "    * quoting - controls when quotes should be generated by the writer.\n"
   1494 "        It can take on any of the following module constants:\n"
   1495 "\n"
   1496 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
   1497 "            field contains either the quotechar or the delimiter\n"
   1498 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
   1499 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
   1500 "            fields which do not parse as integers or floating point\n"
   1501 "            numbers.\n"
   1502 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
   1503 "    * escapechar - specifies a one-character string used to escape \n"
   1504 "        the delimiter when quoting is set to QUOTE_NONE.\n"
   1505 "    * doublequote - controls the handling of quotes inside fields.  When\n"
   1506 "        True, two consecutive quotes are interpreted as one during read,\n"
   1507 "        and when writing, each quote character embedded in the data is\n"
   1508 "        written as two quotes\n");
   1509 
   1510 PyDoc_STRVAR(csv_reader_doc,
   1511 "    csv_reader = reader(iterable [, dialect='excel']\n"
   1512 "                        [optional keyword args])\n"
   1513 "    for row in csv_reader:\n"
   1514 "        process(row)\n"
   1515 "\n"
   1516 "The \"iterable\" argument can be any object that returns a line\n"
   1517 "of input for each iteration, such as a file object or a list.  The\n"
   1518 "optional \"dialect\" parameter is discussed below.  The function\n"
   1519 "also accepts optional keyword arguments which override settings\n"
   1520 "provided by the dialect.\n"
   1521 "\n"
   1522 "The returned object is an iterator.  Each iteration returns a row\n"
   1523 "of the CSV file (which can span multiple input lines):\n");
   1524 
   1525 PyDoc_STRVAR(csv_writer_doc,
   1526 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
   1527 "                            [optional keyword args])\n"
   1528 "    for row in sequence:\n"
   1529 "        csv_writer.writerow(row)\n"
   1530 "\n"
   1531 "    [or]\n"
   1532 "\n"
   1533 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
   1534 "                            [optional keyword args])\n"
   1535 "    csv_writer.writerows(rows)\n"
   1536 "\n"
   1537 "The \"fileobj\" argument can be any object that supports the file API.\n");
   1538 
   1539 PyDoc_STRVAR(csv_list_dialects_doc,
   1540 "Return a list of all know dialect names.\n"
   1541 "    names = csv.list_dialects()");
   1542 
   1543 PyDoc_STRVAR(csv_get_dialect_doc,
   1544 "Return the dialect instance associated with name.\n"
   1545 "    dialect = csv.get_dialect(name)");
   1546 
   1547 PyDoc_STRVAR(csv_register_dialect_doc,
   1548 "Create a mapping from a string name to a dialect class.\n"
   1549 "    dialect = csv.register_dialect(name, dialect)");
   1550 
   1551 PyDoc_STRVAR(csv_unregister_dialect_doc,
   1552 "Delete the name/dialect mapping associated with a string name.\n"
   1553 "    csv.unregister_dialect(name)");
   1554 
   1555 PyDoc_STRVAR(csv_field_size_limit_doc,
   1556 "Sets an upper limit on parsed fields.\n"
   1557 "    csv.field_size_limit([limit])\n"
   1558 "\n"
   1559 "Returns old limit. If limit is not given, no new limit is set and\n"
   1560 "the old limit is returned");
   1561 
   1562 static struct PyMethodDef csv_methods[] = {
   1563     { "reader", (PyCFunction)csv_reader,
   1564         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
   1565     { "writer", (PyCFunction)csv_writer,
   1566         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
   1567     { "list_dialects", (PyCFunction)csv_list_dialects,
   1568         METH_NOARGS, csv_list_dialects_doc},
   1569     { "register_dialect", (PyCFunction)csv_register_dialect,
   1570         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
   1571     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
   1572         METH_O, csv_unregister_dialect_doc},
   1573     { "get_dialect", (PyCFunction)csv_get_dialect,
   1574         METH_O, csv_get_dialect_doc},
   1575     { "field_size_limit", (PyCFunction)csv_field_size_limit,
   1576         METH_VARARGS, csv_field_size_limit_doc},
   1577     { NULL, NULL }
   1578 };
   1579 
   1580 PyMODINIT_FUNC
   1581 init_csv(void)
   1582 {
   1583     PyObject *module;
   1584     StyleDesc *style;
   1585 
   1586     if (PyType_Ready(&Dialect_Type) < 0)
   1587         return;
   1588 
   1589     if (PyType_Ready(&Reader_Type) < 0)
   1590         return;
   1591 
   1592     if (PyType_Ready(&Writer_Type) < 0)
   1593         return;
   1594 
   1595     /* Create the module and add the functions */
   1596     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
   1597     if (module == NULL)
   1598         return;
   1599 
   1600     /* Add version to the module. */
   1601     if (PyModule_AddStringConstant(module, "__version__",
   1602                                    MODULE_VERSION) == -1)
   1603         return;
   1604 
   1605     /* Add _dialects dictionary */
   1606     dialects = PyDict_New();
   1607     if (dialects == NULL)
   1608         return;
   1609     if (PyModule_AddObject(module, "_dialects", dialects))
   1610         return;
   1611 
   1612     /* Add quote styles into dictionary */
   1613     for (style = quote_styles; style->name; style++) {
   1614         if (PyModule_AddIntConstant(module, style->name,
   1615                                     style->style) == -1)
   1616             return;
   1617     }
   1618 
   1619     /* Add the Dialect type */
   1620     Py_INCREF(&Dialect_Type);
   1621     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
   1622         return;
   1623 
   1624     /* Add the CSV exception object to the module. */
   1625     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
   1626     if (error_obj == NULL)
   1627         return;
   1628     PyModule_AddObject(module, "Error", error_obj);
   1629 }
   1630