Home | History | Annotate | Download | only in _io
      1 #define PY_SSIZE_T_CLEAN
      2 #include "Python.h"
      3 #include "structmember.h"
      4 #include "accu.h"
      5 #include "_iomodule.h"
      6 
      7 /* Implementation note: the buffer is always at least one character longer
      8    than the enclosed string, for proper functioning of _PyIO_find_line_ending.
      9 */
     10 
     11 #define STATE_REALIZED 1
     12 #define STATE_ACCUMULATING 2
     13 
     14 /*[clinic input]
     15 module _io
     16 class _io.StringIO "stringio *" "&PyStringIO_Type"
     17 [clinic start generated code]*/
     18 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
     19 
     20 typedef struct {
     21     PyObject_HEAD
     22     Py_UCS4 *buf;
     23     Py_ssize_t pos;
     24     Py_ssize_t string_size;
     25     size_t buf_size;
     26 
     27     /* The stringio object can be in two states: accumulating or realized.
     28        In accumulating state, the internal buffer contains nothing and
     29        the contents are given by the embedded _PyAccu structure.
     30        In realized state, the internal buffer is meaningful and the
     31        _PyAccu is destroyed.
     32     */
     33     int state;
     34     _PyAccu accu;
     35 
     36     char ok; /* initialized? */
     37     char closed;
     38     char readuniversal;
     39     char readtranslate;
     40     PyObject *decoder;
     41     PyObject *readnl;
     42     PyObject *writenl;
     43 
     44     PyObject *dict;
     45     PyObject *weakreflist;
     46 } stringio;
     47 
     48 static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
     49 
     50 #define CHECK_INITIALIZED(self) \
     51     if (self->ok <= 0) { \
     52         PyErr_SetString(PyExc_ValueError, \
     53             "I/O operation on uninitialized object"); \
     54         return NULL; \
     55     }
     56 
     57 #define CHECK_CLOSED(self) \
     58     if (self->closed) { \
     59         PyErr_SetString(PyExc_ValueError, \
     60             "I/O operation on closed file"); \
     61         return NULL; \
     62     }
     63 
     64 #define ENSURE_REALIZED(self) \
     65     if (realize(self) < 0) { \
     66         return NULL; \
     67     }
     68 
     69 
     70 /* Internal routine for changing the size, in terms of characters, of the
     71    buffer of StringIO objects.  The caller should ensure that the 'size'
     72    argument is non-negative.  Returns 0 on success, -1 otherwise. */
     73 static int
     74 resize_buffer(stringio *self, size_t size)
     75 {
     76     /* Here, unsigned types are used to avoid dealing with signed integer
     77        overflow, which is undefined in C. */
     78     size_t alloc = self->buf_size;
     79     Py_UCS4 *new_buf = NULL;
     80 
     81     assert(self->buf != NULL);
     82 
     83     /* Reserve one more char for line ending detection. */
     84     size = size + 1;
     85     /* For simplicity, stay in the range of the signed type. Anyway, Python
     86        doesn't allow strings to be longer than this. */
     87     if (size > PY_SSIZE_T_MAX)
     88         goto overflow;
     89 
     90     if (size < alloc / 2) {
     91         /* Major downsize; resize down to exact size. */
     92         alloc = size + 1;
     93     }
     94     else if (size < alloc) {
     95         /* Within allocated size; quick exit */
     96         return 0;
     97     }
     98     else if (size <= alloc * 1.125) {
     99         /* Moderate upsize; overallocate similar to list_resize() */
    100         alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
    101     }
    102     else {
    103         /* Major upsize; resize up to exact size */
    104         alloc = size + 1;
    105     }
    106 
    107     if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
    108         goto overflow;
    109     new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
    110     if (new_buf == NULL) {
    111         PyErr_NoMemory();
    112         return -1;
    113     }
    114     self->buf_size = alloc;
    115     self->buf = new_buf;
    116 
    117     return 0;
    118 
    119   overflow:
    120     PyErr_SetString(PyExc_OverflowError,
    121                     "new buffer size too large");
    122     return -1;
    123 }
    124 
    125 static PyObject *
    126 make_intermediate(stringio *self)
    127 {
    128     PyObject *intermediate = _PyAccu_Finish(&self->accu);
    129     self->state = STATE_REALIZED;
    130     if (intermediate == NULL)
    131         return NULL;
    132     if (_PyAccu_Init(&self->accu) ||
    133         _PyAccu_Accumulate(&self->accu, intermediate)) {
    134         Py_DECREF(intermediate);
    135         return NULL;
    136     }
    137     self->state = STATE_ACCUMULATING;
    138     return intermediate;
    139 }
    140 
    141 static int
    142 realize(stringio *self)
    143 {
    144     Py_ssize_t len;
    145     PyObject *intermediate;
    146 
    147     if (self->state == STATE_REALIZED)
    148         return 0;
    149     assert(self->state == STATE_ACCUMULATING);
    150     self->state = STATE_REALIZED;
    151 
    152     intermediate = _PyAccu_Finish(&self->accu);
    153     if (intermediate == NULL)
    154         return -1;
    155 
    156     /* Append the intermediate string to the internal buffer.
    157        The length should be equal to the current cursor position.
    158      */
    159     len = PyUnicode_GET_LENGTH(intermediate);
    160     if (resize_buffer(self, len) < 0) {
    161         Py_DECREF(intermediate);
    162         return -1;
    163     }
    164     if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
    165         Py_DECREF(intermediate);
    166         return -1;
    167     }
    168 
    169     Py_DECREF(intermediate);
    170     return 0;
    171 }
    172 
    173 /* Internal routine for writing a whole PyUnicode object to the buffer of a
    174    StringIO object. Returns 0 on success, or -1 on error. */
    175 static Py_ssize_t
    176 write_str(stringio *self, PyObject *obj)
    177 {
    178     Py_ssize_t len;
    179     PyObject *decoded = NULL;
    180 
    181     assert(self->buf != NULL);
    182     assert(self->pos >= 0);
    183 
    184     if (self->decoder != NULL) {
    185         decoded = _PyIncrementalNewlineDecoder_decode(
    186             self->decoder, obj, 1 /* always final */);
    187     }
    188     else {
    189         decoded = obj;
    190         Py_INCREF(decoded);
    191     }
    192     if (self->writenl) {
    193         PyObject *translated = PyUnicode_Replace(
    194             decoded, _PyIO_str_nl, self->writenl, -1);
    195         Py_DECREF(decoded);
    196         decoded = translated;
    197     }
    198     if (decoded == NULL)
    199         return -1;
    200 
    201     assert(PyUnicode_Check(decoded));
    202     if (PyUnicode_READY(decoded)) {
    203         Py_DECREF(decoded);
    204         return -1;
    205     }
    206     len = PyUnicode_GET_LENGTH(decoded);
    207     assert(len >= 0);
    208 
    209     /* This overflow check is not strictly necessary. However, it avoids us to
    210        deal with funky things like comparing an unsigned and a signed
    211        integer. */
    212     if (self->pos > PY_SSIZE_T_MAX - len) {
    213         PyErr_SetString(PyExc_OverflowError,
    214                         "new position too large");
    215         goto fail;
    216     }
    217 
    218     if (self->state == STATE_ACCUMULATING) {
    219         if (self->string_size == self->pos) {
    220             if (_PyAccu_Accumulate(&self->accu, decoded))
    221                 goto fail;
    222             goto success;
    223         }
    224         if (realize(self))
    225             goto fail;
    226     }
    227 
    228     if (self->pos + len > self->string_size) {
    229         if (resize_buffer(self, self->pos + len) < 0)
    230             goto fail;
    231     }
    232 
    233     if (self->pos > self->string_size) {
    234         /* In case of overseek, pad with null bytes the buffer region between
    235            the end of stream and the current position.
    236 
    237           0   lo      string_size                           hi
    238           |   |<---used--->|<----------available----------->|
    239           |   |            <--to pad-->|<---to write--->    |
    240           0   buf                   position
    241 
    242         */
    243         memset(self->buf + self->string_size, '\0',
    244                (self->pos - self->string_size) * sizeof(Py_UCS4));
    245     }
    246 
    247     /* Copy the data to the internal buffer, overwriting some of the
    248        existing data if self->pos < self->string_size. */
    249     if (!PyUnicode_AsUCS4(decoded,
    250                           self->buf + self->pos,
    251                           self->buf_size - self->pos,
    252                           0))
    253         goto fail;
    254 
    255 success:
    256     /* Set the new length of the internal string if it has changed. */
    257     self->pos += len;
    258     if (self->string_size < self->pos)
    259         self->string_size = self->pos;
    260 
    261     Py_DECREF(decoded);
    262     return 0;
    263 
    264 fail:
    265     Py_XDECREF(decoded);
    266     return -1;
    267 }
    268 
    269 /*[clinic input]
    270 _io.StringIO.getvalue
    271 
    272 Retrieve the entire contents of the object.
    273 [clinic start generated code]*/
    274 
    275 static PyObject *
    276 _io_StringIO_getvalue_impl(stringio *self)
    277 /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
    278 {
    279     CHECK_INITIALIZED(self);
    280     CHECK_CLOSED(self);
    281     if (self->state == STATE_ACCUMULATING)
    282         return make_intermediate(self);
    283     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
    284                                      self->string_size);
    285 }
    286 
    287 /*[clinic input]
    288 _io.StringIO.tell
    289 
    290 Tell the current file position.
    291 [clinic start generated code]*/
    292 
    293 static PyObject *
    294 _io_StringIO_tell_impl(stringio *self)
    295 /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
    296 {
    297     CHECK_INITIALIZED(self);
    298     CHECK_CLOSED(self);
    299     return PyLong_FromSsize_t(self->pos);
    300 }
    301 
    302 /*[clinic input]
    303 _io.StringIO.read
    304     size as arg: object = None
    305     /
    306 
    307 Read at most size characters, returned as a string.
    308 
    309 If the argument is negative or omitted, read until EOF
    310 is reached. Return an empty string at EOF.
    311 [clinic start generated code]*/
    312 
    313 static PyObject *
    314 _io_StringIO_read_impl(stringio *self, PyObject *arg)
    315 /*[clinic end generated code: output=3676864773746f68 input=9a319015f6f3965c]*/
    316 {
    317     Py_ssize_t size, n;
    318     Py_UCS4 *output;
    319 
    320     CHECK_INITIALIZED(self);
    321     CHECK_CLOSED(self);
    322 
    323     if (PyNumber_Check(arg)) {
    324         size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
    325         if (size == -1 && PyErr_Occurred())
    326             return NULL;
    327     }
    328     else if (arg == Py_None) {
    329         /* Read until EOF is reached, by default. */
    330         size = -1;
    331     }
    332     else {
    333         PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
    334                      Py_TYPE(arg)->tp_name);
    335         return NULL;
    336     }
    337 
    338     /* adjust invalid sizes */
    339     n = self->string_size - self->pos;
    340     if (size < 0 || size > n) {
    341         size = n;
    342         if (size < 0)
    343             size = 0;
    344     }
    345 
    346     /* Optimization for seek(0); read() */
    347     if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
    348         PyObject *result = make_intermediate(self);
    349         self->pos = self->string_size;
    350         return result;
    351     }
    352 
    353     ENSURE_REALIZED(self);
    354     output = self->buf + self->pos;
    355     self->pos += size;
    356     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
    357 }
    358 
    359 /* Internal helper, used by stringio_readline and stringio_iternext */
    360 static PyObject *
    361 _stringio_readline(stringio *self, Py_ssize_t limit)
    362 {
    363     Py_UCS4 *start, *end, old_char;
    364     Py_ssize_t len, consumed;
    365 
    366     /* In case of overseek, return the empty string */
    367     if (self->pos >= self->string_size)
    368         return PyUnicode_New(0, 0);
    369 
    370     start = self->buf + self->pos;
    371     if (limit < 0 || limit > self->string_size - self->pos)
    372         limit = self->string_size - self->pos;
    373 
    374     end = start + limit;
    375     old_char = *end;
    376     *end = '\0';
    377     len = _PyIO_find_line_ending(
    378         self->readtranslate, self->readuniversal, self->readnl,
    379         PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
    380     *end = old_char;
    381     /* If we haven't found any line ending, we just return everything
    382        (`consumed` is ignored). */
    383     if (len < 0)
    384         len = limit;
    385     self->pos += len;
    386     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
    387 }
    388 
    389 /*[clinic input]
    390 _io.StringIO.readline
    391     size as arg: object = None
    392     /
    393 
    394 Read until newline or EOF.
    395 
    396 Returns an empty string if EOF is hit immediately.
    397 [clinic start generated code]*/
    398 
    399 static PyObject *
    400 _io_StringIO_readline_impl(stringio *self, PyObject *arg)
    401 /*[clinic end generated code: output=99fdcac03a3dee81 input=e0e0ed4042040176]*/
    402 {
    403     Py_ssize_t limit = -1;
    404 
    405     CHECK_INITIALIZED(self);
    406     CHECK_CLOSED(self);
    407     ENSURE_REALIZED(self);
    408 
    409     if (PyNumber_Check(arg)) {
    410         limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
    411         if (limit == -1 && PyErr_Occurred())
    412             return NULL;
    413     }
    414     else if (arg != Py_None) {
    415         PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
    416                      Py_TYPE(arg)->tp_name);
    417         return NULL;
    418     }
    419     return _stringio_readline(self, limit);
    420 }
    421 
    422 static PyObject *
    423 stringio_iternext(stringio *self)
    424 {
    425     PyObject *line;
    426 
    427     CHECK_INITIALIZED(self);
    428     CHECK_CLOSED(self);
    429     ENSURE_REALIZED(self);
    430 
    431     if (Py_TYPE(self) == &PyStringIO_Type) {
    432         /* Skip method call overhead for speed */
    433         line = _stringio_readline(self, -1);
    434     }
    435     else {
    436         /* XXX is subclassing StringIO really supported? */
    437         line = PyObject_CallMethodObjArgs((PyObject *)self,
    438                                            _PyIO_str_readline, NULL);
    439         if (line && !PyUnicode_Check(line)) {
    440             PyErr_Format(PyExc_IOError,
    441                          "readline() should have returned a str object, "
    442                          "not '%.200s'", Py_TYPE(line)->tp_name);
    443             Py_DECREF(line);
    444             return NULL;
    445         }
    446     }
    447 
    448     if (line == NULL)
    449         return NULL;
    450 
    451     if (PyUnicode_GET_LENGTH(line) == 0) {
    452         /* Reached EOF */
    453         Py_DECREF(line);
    454         return NULL;
    455     }
    456 
    457     return line;
    458 }
    459 
    460 /*[clinic input]
    461 _io.StringIO.truncate
    462     pos as arg: object = None
    463     /
    464 
    465 Truncate size to pos.
    466 
    467 The pos argument defaults to the current file position, as
    468 returned by tell().  The current file position is unchanged.
    469 Returns the new absolute position.
    470 [clinic start generated code]*/
    471 
    472 static PyObject *
    473 _io_StringIO_truncate_impl(stringio *self, PyObject *arg)
    474 /*[clinic end generated code: output=6072439c2b01d306 input=748619a494ba53ad]*/
    475 {
    476     Py_ssize_t size;
    477 
    478     CHECK_INITIALIZED(self);
    479     CHECK_CLOSED(self);
    480 
    481     if (PyNumber_Check(arg)) {
    482         size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
    483         if (size == -1 && PyErr_Occurred())
    484             return NULL;
    485     }
    486     else if (arg == Py_None) {
    487         /* Truncate to current position if no argument is passed. */
    488         size = self->pos;
    489     }
    490     else {
    491         PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
    492                      Py_TYPE(arg)->tp_name);
    493         return NULL;
    494     }
    495 
    496     if (size < 0) {
    497         PyErr_Format(PyExc_ValueError,
    498                      "Negative size value %zd", size);
    499         return NULL;
    500     }
    501 
    502     if (size < self->string_size) {
    503         ENSURE_REALIZED(self);
    504         if (resize_buffer(self, size) < 0)
    505             return NULL;
    506         self->string_size = size;
    507     }
    508 
    509     return PyLong_FromSsize_t(size);
    510 }
    511 
    512 /*[clinic input]
    513 _io.StringIO.seek
    514     pos: Py_ssize_t
    515     whence: int = 0
    516     /
    517 
    518 Change stream position.
    519 
    520 Seek to character offset pos relative to position indicated by whence:
    521     0  Start of stream (the default).  pos should be >= 0;
    522     1  Current position - pos must be 0;
    523     2  End of stream - pos must be 0.
    524 Returns the new absolute position.
    525 [clinic start generated code]*/
    526 
    527 static PyObject *
    528 _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
    529 /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
    530 {
    531     CHECK_INITIALIZED(self);
    532     CHECK_CLOSED(self);
    533 
    534     if (whence != 0 && whence != 1 && whence != 2) {
    535         PyErr_Format(PyExc_ValueError,
    536                      "Invalid whence (%i, should be 0, 1 or 2)", whence);
    537         return NULL;
    538     }
    539     else if (pos < 0 && whence == 0) {
    540         PyErr_Format(PyExc_ValueError,
    541                      "Negative seek position %zd", pos);
    542         return NULL;
    543     }
    544     else if (whence != 0 && pos != 0) {
    545         PyErr_SetString(PyExc_IOError,
    546                         "Can't do nonzero cur-relative seeks");
    547         return NULL;
    548     }
    549 
    550     /* whence = 0: offset relative to beginning of the string.
    551        whence = 1: no change to current position.
    552        whence = 2: change position to end of file. */
    553     if (whence == 1) {
    554         pos = self->pos;
    555     }
    556     else if (whence == 2) {
    557         pos = self->string_size;
    558     }
    559 
    560     self->pos = pos;
    561 
    562     return PyLong_FromSsize_t(self->pos);
    563 }
    564 
    565 /*[clinic input]
    566 _io.StringIO.write
    567     s as obj: object
    568     /
    569 
    570 Write string to file.
    571 
    572 Returns the number of characters written, which is always equal to
    573 the length of the string.
    574 [clinic start generated code]*/
    575 
    576 static PyObject *
    577 _io_StringIO_write(stringio *self, PyObject *obj)
    578 /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
    579 {
    580     Py_ssize_t size;
    581 
    582     CHECK_INITIALIZED(self);
    583     if (!PyUnicode_Check(obj)) {
    584         PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
    585                      Py_TYPE(obj)->tp_name);
    586         return NULL;
    587     }
    588     if (PyUnicode_READY(obj))
    589         return NULL;
    590     CHECK_CLOSED(self);
    591     size = PyUnicode_GET_LENGTH(obj);
    592 
    593     if (size > 0 && write_str(self, obj) < 0)
    594         return NULL;
    595 
    596     return PyLong_FromSsize_t(size);
    597 }
    598 
    599 /*[clinic input]
    600 _io.StringIO.close
    601 
    602 Close the IO object.
    603 
    604 Attempting any further operation after the object is closed
    605 will raise a ValueError.
    606 
    607 This method has no effect if the file is already closed.
    608 [clinic start generated code]*/
    609 
    610 static PyObject *
    611 _io_StringIO_close_impl(stringio *self)
    612 /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
    613 {
    614     self->closed = 1;
    615     /* Free up some memory */
    616     if (resize_buffer(self, 0) < 0)
    617         return NULL;
    618     _PyAccu_Destroy(&self->accu);
    619     Py_CLEAR(self->readnl);
    620     Py_CLEAR(self->writenl);
    621     Py_CLEAR(self->decoder);
    622     Py_RETURN_NONE;
    623 }
    624 
    625 static int
    626 stringio_traverse(stringio *self, visitproc visit, void *arg)
    627 {
    628     Py_VISIT(self->dict);
    629     return 0;
    630 }
    631 
    632 static int
    633 stringio_clear(stringio *self)
    634 {
    635     Py_CLEAR(self->dict);
    636     return 0;
    637 }
    638 
    639 static void
    640 stringio_dealloc(stringio *self)
    641 {
    642     _PyObject_GC_UNTRACK(self);
    643     self->ok = 0;
    644     if (self->buf) {
    645         PyMem_Free(self->buf);
    646         self->buf = NULL;
    647     }
    648     _PyAccu_Destroy(&self->accu);
    649     Py_CLEAR(self->readnl);
    650     Py_CLEAR(self->writenl);
    651     Py_CLEAR(self->decoder);
    652     Py_CLEAR(self->dict);
    653     if (self->weakreflist != NULL)
    654         PyObject_ClearWeakRefs((PyObject *) self);
    655     Py_TYPE(self)->tp_free(self);
    656 }
    657 
    658 static PyObject *
    659 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    660 {
    661     stringio *self;
    662 
    663     assert(type != NULL && type->tp_alloc != NULL);
    664     self = (stringio *)type->tp_alloc(type, 0);
    665     if (self == NULL)
    666         return NULL;
    667 
    668     /* tp_alloc initializes all the fields to zero. So we don't have to
    669        initialize them here. */
    670 
    671     self->buf = (Py_UCS4 *)PyMem_Malloc(0);
    672     if (self->buf == NULL) {
    673         Py_DECREF(self);
    674         return PyErr_NoMemory();
    675     }
    676 
    677     return (PyObject *)self;
    678 }
    679 
    680 /*[clinic input]
    681 _io.StringIO.__init__
    682     initial_value as value: object(c_default="NULL") = ''
    683     newline as newline_obj: object(c_default="NULL") = '\n'
    684 
    685 Text I/O implementation using an in-memory buffer.
    686 
    687 The initial_value argument sets the value of object.  The newline
    688 argument is like the one of TextIOWrapper's constructor.
    689 [clinic start generated code]*/
    690 
    691 static int
    692 _io_StringIO___init___impl(stringio *self, PyObject *value,
    693                            PyObject *newline_obj)
    694 /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
    695 {
    696     char *newline = "\n";
    697     Py_ssize_t value_len;
    698 
    699     /* Parse the newline argument. We only want to allow unicode objects or
    700        None. */
    701     if (newline_obj == Py_None) {
    702         newline = NULL;
    703     }
    704     else if (newline_obj) {
    705         if (!PyUnicode_Check(newline_obj)) {
    706             PyErr_Format(PyExc_TypeError,
    707                          "newline must be str or None, not %.200s",
    708                          Py_TYPE(newline_obj)->tp_name);
    709             return -1;
    710         }
    711         newline = PyUnicode_AsUTF8(newline_obj);
    712         if (newline == NULL)
    713             return -1;
    714     }
    715 
    716     if (newline && newline[0] != '\0'
    717         && !(newline[0] == '\n' && newline[1] == '\0')
    718         && !(newline[0] == '\r' && newline[1] == '\0')
    719         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
    720         PyErr_Format(PyExc_ValueError,
    721                      "illegal newline value: %R", newline_obj);
    722         return -1;
    723     }
    724     if (value && value != Py_None && !PyUnicode_Check(value)) {
    725         PyErr_Format(PyExc_TypeError,
    726                      "initial_value must be str or None, not %.200s",
    727                      Py_TYPE(value)->tp_name);
    728         return -1;
    729     }
    730 
    731     self->ok = 0;
    732 
    733     _PyAccu_Destroy(&self->accu);
    734     Py_CLEAR(self->readnl);
    735     Py_CLEAR(self->writenl);
    736     Py_CLEAR(self->decoder);
    737 
    738     assert((newline != NULL && newline_obj != Py_None) ||
    739            (newline == NULL && newline_obj == Py_None));
    740 
    741     if (newline) {
    742         self->readnl = PyUnicode_FromString(newline);
    743         if (self->readnl == NULL)
    744             return -1;
    745     }
    746     self->readuniversal = (newline == NULL || newline[0] == '\0');
    747     self->readtranslate = (newline == NULL);
    748     /* If newline == "", we don't translate anything.
    749        If newline == "\n" or newline == None, we translate to "\n", which is
    750        a no-op.
    751        (for newline == None, TextIOWrapper translates to os.linesep, but it
    752        is pointless for StringIO)
    753     */
    754     if (newline != NULL && newline[0] == '\r') {
    755         self->writenl = self->readnl;
    756         Py_INCREF(self->writenl);
    757     }
    758 
    759     if (self->readuniversal) {
    760         self->decoder = PyObject_CallFunction(
    761             (PyObject *)&PyIncrementalNewlineDecoder_Type,
    762             "Oi", Py_None, (int) self->readtranslate);
    763         if (self->decoder == NULL)
    764             return -1;
    765     }
    766 
    767     /* Now everything is set up, resize buffer to size of initial value,
    768        and copy it */
    769     self->string_size = 0;
    770     if (value && value != Py_None)
    771         value_len = PyUnicode_GetLength(value);
    772     else
    773         value_len = 0;
    774     if (value_len > 0) {
    775         /* This is a heuristic, for newline translation might change
    776            the string length. */
    777         if (resize_buffer(self, 0) < 0)
    778             return -1;
    779         self->state = STATE_REALIZED;
    780         self->pos = 0;
    781         if (write_str(self, value) < 0)
    782             return -1;
    783     }
    784     else {
    785         /* Empty stringio object, we can start by accumulating */
    786         if (resize_buffer(self, 0) < 0)
    787             return -1;
    788         if (_PyAccu_Init(&self->accu))
    789             return -1;
    790         self->state = STATE_ACCUMULATING;
    791     }
    792     self->pos = 0;
    793 
    794     self->closed = 0;
    795     self->ok = 1;
    796     return 0;
    797 }
    798 
    799 /* Properties and pseudo-properties */
    800 
    801 /*[clinic input]
    802 _io.StringIO.readable
    803 
    804 Returns True if the IO object can be read.
    805 [clinic start generated code]*/
    806 
    807 static PyObject *
    808 _io_StringIO_readable_impl(stringio *self)
    809 /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
    810 {
    811     CHECK_INITIALIZED(self);
    812     CHECK_CLOSED(self);
    813     Py_RETURN_TRUE;
    814 }
    815 
    816 /*[clinic input]
    817 _io.StringIO.writable
    818 
    819 Returns True if the IO object can be written.
    820 [clinic start generated code]*/
    821 
    822 static PyObject *
    823 _io_StringIO_writable_impl(stringio *self)
    824 /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
    825 {
    826     CHECK_INITIALIZED(self);
    827     CHECK_CLOSED(self);
    828     Py_RETURN_TRUE;
    829 }
    830 
    831 /*[clinic input]
    832 _io.StringIO.seekable
    833 
    834 Returns True if the IO object can be seeked.
    835 [clinic start generated code]*/
    836 
    837 static PyObject *
    838 _io_StringIO_seekable_impl(stringio *self)
    839 /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
    840 {
    841     CHECK_INITIALIZED(self);
    842     CHECK_CLOSED(self);
    843     Py_RETURN_TRUE;
    844 }
    845 
    846 /* Pickling support.
    847 
    848    The implementation of __getstate__ is similar to the one for BytesIO,
    849    except that we also save the newline parameter. For __setstate__ and unlike
    850    BytesIO, we call __init__ to restore the object's state. Doing so allows us
    851    to avoid decoding the complex newline state while keeping the object
    852    representation compact.
    853 
    854    See comment in bytesio.c regarding why only pickle protocols and onward are
    855    supported.
    856 */
    857 
    858 static PyObject *
    859 stringio_getstate(stringio *self)
    860 {
    861     PyObject *initvalue = _io_StringIO_getvalue_impl(self);
    862     PyObject *dict;
    863     PyObject *state;
    864 
    865     if (initvalue == NULL)
    866         return NULL;
    867     if (self->dict == NULL) {
    868         Py_INCREF(Py_None);
    869         dict = Py_None;
    870     }
    871     else {
    872         dict = PyDict_Copy(self->dict);
    873         if (dict == NULL)
    874             return NULL;
    875     }
    876 
    877     state = Py_BuildValue("(OOnN)", initvalue,
    878                           self->readnl ? self->readnl : Py_None,
    879                           self->pos, dict);
    880     Py_DECREF(initvalue);
    881     return state;
    882 }
    883 
    884 static PyObject *
    885 stringio_setstate(stringio *self, PyObject *state)
    886 {
    887     PyObject *initarg;
    888     PyObject *position_obj;
    889     PyObject *dict;
    890     Py_ssize_t pos;
    891 
    892     assert(state != NULL);
    893     CHECK_CLOSED(self);
    894 
    895     /* We allow the state tuple to be longer than 4, because we may need
    896        someday to extend the object's state without breaking
    897        backward-compatibility. */
    898     if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
    899         PyErr_Format(PyExc_TypeError,
    900                      "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
    901                      Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
    902         return NULL;
    903     }
    904 
    905     /* Initialize the object's state. */
    906     initarg = PyTuple_GetSlice(state, 0, 2);
    907     if (initarg == NULL)
    908         return NULL;
    909     if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
    910         Py_DECREF(initarg);
    911         return NULL;
    912     }
    913     Py_DECREF(initarg);
    914 
    915     /* Restore the buffer state. Even if __init__ did initialize the buffer,
    916        we have to initialize it again since __init__ may translate the
    917        newlines in the initial_value string. We clearly do not want that
    918        because the string value in the state tuple has already been translated
    919        once by __init__. So we do not take any chance and replace object's
    920        buffer completely. */
    921     {
    922         PyObject *item;
    923         Py_UCS4 *buf;
    924         Py_ssize_t bufsize;
    925 
    926         item = PyTuple_GET_ITEM(state, 0);
    927         buf = PyUnicode_AsUCS4Copy(item);
    928         if (buf == NULL)
    929             return NULL;
    930         bufsize = PyUnicode_GET_LENGTH(item);
    931 
    932         if (resize_buffer(self, bufsize) < 0) {
    933             PyMem_Free(buf);
    934             return NULL;
    935         }
    936         memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
    937         PyMem_Free(buf);
    938         self->string_size = bufsize;
    939     }
    940 
    941     /* Set carefully the position value. Alternatively, we could use the seek
    942        method instead of modifying self->pos directly to better protect the
    943        object internal state against errneous (or malicious) inputs. */
    944     position_obj = PyTuple_GET_ITEM(state, 2);
    945     if (!PyLong_Check(position_obj)) {
    946         PyErr_Format(PyExc_TypeError,
    947                      "third item of state must be an integer, got %.200s",
    948                      Py_TYPE(position_obj)->tp_name);
    949         return NULL;
    950     }
    951     pos = PyLong_AsSsize_t(position_obj);
    952     if (pos == -1 && PyErr_Occurred())
    953         return NULL;
    954     if (pos < 0) {
    955         PyErr_SetString(PyExc_ValueError,
    956                         "position value cannot be negative");
    957         return NULL;
    958     }
    959     self->pos = pos;
    960 
    961     /* Set the dictionary of the instance variables. */
    962     dict = PyTuple_GET_ITEM(state, 3);
    963     if (dict != Py_None) {
    964         if (!PyDict_Check(dict)) {
    965             PyErr_Format(PyExc_TypeError,
    966                          "fourth item of state should be a dict, got a %.200s",
    967                          Py_TYPE(dict)->tp_name);
    968             return NULL;
    969         }
    970         if (self->dict) {
    971             /* Alternatively, we could replace the internal dictionary
    972                completely. However, it seems more practical to just update it. */
    973             if (PyDict_Update(self->dict, dict) < 0)
    974                 return NULL;
    975         }
    976         else {
    977             Py_INCREF(dict);
    978             self->dict = dict;
    979         }
    980     }
    981 
    982     Py_RETURN_NONE;
    983 }
    984 
    985 
    986 static PyObject *
    987 stringio_closed(stringio *self, void *context)
    988 {
    989     CHECK_INITIALIZED(self);
    990     return PyBool_FromLong(self->closed);
    991 }
    992 
    993 static PyObject *
    994 stringio_line_buffering(stringio *self, void *context)
    995 {
    996     CHECK_INITIALIZED(self);
    997     CHECK_CLOSED(self);
    998     Py_RETURN_FALSE;
    999 }
   1000 
   1001 static PyObject *
   1002 stringio_newlines(stringio *self, void *context)
   1003 {
   1004     CHECK_INITIALIZED(self);
   1005     CHECK_CLOSED(self);
   1006     if (self->decoder == NULL)
   1007         Py_RETURN_NONE;
   1008     return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
   1009 }
   1010 
   1011 #include "clinic/stringio.c.h"
   1012 
   1013 static struct PyMethodDef stringio_methods[] = {
   1014     _IO_STRINGIO_CLOSE_METHODDEF
   1015     _IO_STRINGIO_GETVALUE_METHODDEF
   1016     _IO_STRINGIO_READ_METHODDEF
   1017     _IO_STRINGIO_READLINE_METHODDEF
   1018     _IO_STRINGIO_TELL_METHODDEF
   1019     _IO_STRINGIO_TRUNCATE_METHODDEF
   1020     _IO_STRINGIO_SEEK_METHODDEF
   1021     _IO_STRINGIO_WRITE_METHODDEF
   1022 
   1023     _IO_STRINGIO_SEEKABLE_METHODDEF
   1024     _IO_STRINGIO_READABLE_METHODDEF
   1025     _IO_STRINGIO_WRITABLE_METHODDEF
   1026 
   1027     {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
   1028     {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
   1029     {NULL, NULL}        /* sentinel */
   1030 };
   1031 
   1032 static PyGetSetDef stringio_getset[] = {
   1033     {"closed",         (getter)stringio_closed,         NULL, NULL},
   1034     {"newlines",       (getter)stringio_newlines,       NULL, NULL},
   1035     /*  (following comments straight off of the original Python wrapper:)
   1036         XXX Cruft to support the TextIOWrapper API. This would only
   1037         be meaningful if StringIO supported the buffer attribute.
   1038         Hopefully, a better solution, than adding these pseudo-attributes,
   1039         will be found.
   1040     */
   1041     {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
   1042     {NULL}
   1043 };
   1044 
   1045 PyTypeObject PyStringIO_Type = {
   1046     PyVarObject_HEAD_INIT(NULL, 0)
   1047     "_io.StringIO",                            /*tp_name*/
   1048     sizeof(stringio),                    /*tp_basicsize*/
   1049     0,                                         /*tp_itemsize*/
   1050     (destructor)stringio_dealloc,              /*tp_dealloc*/
   1051     0,                                         /*tp_print*/
   1052     0,                                         /*tp_getattr*/
   1053     0,                                         /*tp_setattr*/
   1054     0,                                         /*tp_reserved*/
   1055     0,                                         /*tp_repr*/
   1056     0,                                         /*tp_as_number*/
   1057     0,                                         /*tp_as_sequence*/
   1058     0,                                         /*tp_as_mapping*/
   1059     0,                                         /*tp_hash*/
   1060     0,                                         /*tp_call*/
   1061     0,                                         /*tp_str*/
   1062     0,                                         /*tp_getattro*/
   1063     0,                                         /*tp_setattro*/
   1064     0,                                         /*tp_as_buffer*/
   1065     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
   1066                        | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
   1067     _io_StringIO___init____doc__,              /*tp_doc*/
   1068     (traverseproc)stringio_traverse,           /*tp_traverse*/
   1069     (inquiry)stringio_clear,                   /*tp_clear*/
   1070     0,                                         /*tp_richcompare*/
   1071     offsetof(stringio, weakreflist),            /*tp_weaklistoffset*/
   1072     0,                                         /*tp_iter*/
   1073     (iternextfunc)stringio_iternext,           /*tp_iternext*/
   1074     stringio_methods,                          /*tp_methods*/
   1075     0,                                         /*tp_members*/
   1076     stringio_getset,                           /*tp_getset*/
   1077     0,                                         /*tp_base*/
   1078     0,                                         /*tp_dict*/
   1079     0,                                         /*tp_descr_get*/
   1080     0,                                         /*tp_descr_set*/
   1081     offsetof(stringio, dict),                  /*tp_dictoffset*/
   1082     _io_StringIO___init__,                     /*tp_init*/
   1083     0,                                         /*tp_alloc*/
   1084     stringio_new,                              /*tp_new*/
   1085 };
   1086