1 /* 2 An implementation of Text I/O as defined by PEP 3116 - "New I/O" 3 4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. 5 6 Written by Amaury Forgeot d'Arc and Antoine Pitrou 7 */ 8 9 #define PY_SSIZE_T_CLEAN 10 #include "Python.h" 11 #include "structmember.h" 12 #include "_iomodule.h" 13 14 /*[clinic input] 15 module _io 16 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type" 17 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe" 18 [clinic start generated code]*/ 19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/ 20 21 /*[python input] 22 class io_ssize_t_converter(CConverter): 23 type = 'Py_ssize_t' 24 converter = '_PyIO_ConvertSsize_t' 25 [python start generated code]*/ 26 /*[python end generated code: output=da39a3ee5e6b4b0d input=d0a811d3cbfd1b33]*/ 27 28 _Py_IDENTIFIER(close); 29 _Py_IDENTIFIER(_dealloc_warn); 30 _Py_IDENTIFIER(decode); 31 _Py_IDENTIFIER(fileno); 32 _Py_IDENTIFIER(flush); 33 _Py_IDENTIFIER(getpreferredencoding); 34 _Py_IDENTIFIER(isatty); 35 _Py_IDENTIFIER(mode); 36 _Py_IDENTIFIER(name); 37 _Py_IDENTIFIER(raw); 38 _Py_IDENTIFIER(read); 39 _Py_IDENTIFIER(read1); 40 _Py_IDENTIFIER(readable); 41 _Py_IDENTIFIER(replace); 42 _Py_IDENTIFIER(reset); 43 _Py_IDENTIFIER(seek); 44 _Py_IDENTIFIER(seekable); 45 _Py_IDENTIFIER(setstate); 46 _Py_IDENTIFIER(tell); 47 _Py_IDENTIFIER(writable); 48 49 /* TextIOBase */ 50 51 PyDoc_STRVAR(textiobase_doc, 52 "Base class for text I/O.\n" 53 "\n" 54 "This class provides a character and line based interface to stream\n" 55 "I/O. There is no readinto method because Python's character strings\n" 56 "are immutable. There is no public constructor.\n" 57 ); 58 59 static PyObject * 60 _unsupported(const char *message) 61 { 62 _PyIO_State *state = IO_STATE(); 63 if (state != NULL) 64 PyErr_SetString(state->unsupported_operation, message); 65 return NULL; 66 } 67 68 PyDoc_STRVAR(textiobase_detach_doc, 69 "Separate the underlying buffer from the TextIOBase and return it.\n" 70 "\n" 71 "After the underlying buffer has been detached, the TextIO is in an\n" 72 "unusable state.\n" 73 ); 74 75 static PyObject * 76 textiobase_detach(PyObject *self) 77 { 78 return _unsupported("detach"); 79 } 80 81 PyDoc_STRVAR(textiobase_read_doc, 82 "Read at most n characters from stream.\n" 83 "\n" 84 "Read from underlying buffer until we have n characters or we hit EOF.\n" 85 "If n is negative or omitted, read until EOF.\n" 86 ); 87 88 static PyObject * 89 textiobase_read(PyObject *self, PyObject *args) 90 { 91 return _unsupported("read"); 92 } 93 94 PyDoc_STRVAR(textiobase_readline_doc, 95 "Read until newline or EOF.\n" 96 "\n" 97 "Returns an empty string if EOF is hit immediately.\n" 98 ); 99 100 static PyObject * 101 textiobase_readline(PyObject *self, PyObject *args) 102 { 103 return _unsupported("readline"); 104 } 105 106 PyDoc_STRVAR(textiobase_write_doc, 107 "Write string to stream.\n" 108 "Returns the number of characters written (which is always equal to\n" 109 "the length of the string).\n" 110 ); 111 112 static PyObject * 113 textiobase_write(PyObject *self, PyObject *args) 114 { 115 return _unsupported("write"); 116 } 117 118 PyDoc_STRVAR(textiobase_encoding_doc, 119 "Encoding of the text stream.\n" 120 "\n" 121 "Subclasses should override.\n" 122 ); 123 124 static PyObject * 125 textiobase_encoding_get(PyObject *self, void *context) 126 { 127 Py_RETURN_NONE; 128 } 129 130 PyDoc_STRVAR(textiobase_newlines_doc, 131 "Line endings translated so far.\n" 132 "\n" 133 "Only line endings translated during reading are considered.\n" 134 "\n" 135 "Subclasses should override.\n" 136 ); 137 138 static PyObject * 139 textiobase_newlines_get(PyObject *self, void *context) 140 { 141 Py_RETURN_NONE; 142 } 143 144 PyDoc_STRVAR(textiobase_errors_doc, 145 "The error setting of the decoder or encoder.\n" 146 "\n" 147 "Subclasses should override.\n" 148 ); 149 150 static PyObject * 151 textiobase_errors_get(PyObject *self, void *context) 152 { 153 Py_RETURN_NONE; 154 } 155 156 157 static PyMethodDef textiobase_methods[] = { 158 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc}, 159 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc}, 160 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc}, 161 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc}, 162 {NULL, NULL} 163 }; 164 165 static PyGetSetDef textiobase_getset[] = { 166 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc}, 167 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc}, 168 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc}, 169 {NULL} 170 }; 171 172 PyTypeObject PyTextIOBase_Type = { 173 PyVarObject_HEAD_INIT(NULL, 0) 174 "_io._TextIOBase", /*tp_name*/ 175 0, /*tp_basicsize*/ 176 0, /*tp_itemsize*/ 177 0, /*tp_dealloc*/ 178 0, /*tp_print*/ 179 0, /*tp_getattr*/ 180 0, /*tp_setattr*/ 181 0, /*tp_compare */ 182 0, /*tp_repr*/ 183 0, /*tp_as_number*/ 184 0, /*tp_as_sequence*/ 185 0, /*tp_as_mapping*/ 186 0, /*tp_hash */ 187 0, /*tp_call*/ 188 0, /*tp_str*/ 189 0, /*tp_getattro*/ 190 0, /*tp_setattro*/ 191 0, /*tp_as_buffer*/ 192 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE 193 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ 194 textiobase_doc, /* tp_doc */ 195 0, /* tp_traverse */ 196 0, /* tp_clear */ 197 0, /* tp_richcompare */ 198 0, /* tp_weaklistoffset */ 199 0, /* tp_iter */ 200 0, /* tp_iternext */ 201 textiobase_methods, /* tp_methods */ 202 0, /* tp_members */ 203 textiobase_getset, /* tp_getset */ 204 &PyIOBase_Type, /* tp_base */ 205 0, /* tp_dict */ 206 0, /* tp_descr_get */ 207 0, /* tp_descr_set */ 208 0, /* tp_dictoffset */ 209 0, /* tp_init */ 210 0, /* tp_alloc */ 211 0, /* tp_new */ 212 0, /* tp_free */ 213 0, /* tp_is_gc */ 214 0, /* tp_bases */ 215 0, /* tp_mro */ 216 0, /* tp_cache */ 217 0, /* tp_subclasses */ 218 0, /* tp_weaklist */ 219 0, /* tp_del */ 220 0, /* tp_version_tag */ 221 0, /* tp_finalize */ 222 }; 223 224 225 /* IncrementalNewlineDecoder */ 226 227 typedef struct { 228 PyObject_HEAD 229 PyObject *decoder; 230 PyObject *errors; 231 unsigned int pendingcr: 1; 232 unsigned int translate: 1; 233 unsigned int seennl: 3; 234 } nldecoder_object; 235 236 /*[clinic input] 237 _io.IncrementalNewlineDecoder.__init__ 238 decoder: object 239 translate: int 240 errors: object(c_default="NULL") = "strict" 241 242 Codec used when reading a file in universal newlines mode. 243 244 It wraps another incremental decoder, translating \r\n and \r into \n. 245 It also records the types of newlines encountered. When used with 246 translate=False, it ensures that the newline sequence is returned in 247 one piece. When used with decoder=None, it expects unicode strings as 248 decode input and translates newlines without first invoking an external 249 decoder. 250 [clinic start generated code]*/ 251 252 static int 253 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, 254 PyObject *decoder, int translate, 255 PyObject *errors) 256 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/ 257 { 258 self->decoder = decoder; 259 Py_INCREF(decoder); 260 261 if (errors == NULL) { 262 self->errors = PyUnicode_FromString("strict"); 263 if (self->errors == NULL) 264 return -1; 265 } 266 else { 267 Py_INCREF(errors); 268 self->errors = errors; 269 } 270 271 self->translate = translate; 272 self->seennl = 0; 273 self->pendingcr = 0; 274 275 return 0; 276 } 277 278 static void 279 incrementalnewlinedecoder_dealloc(nldecoder_object *self) 280 { 281 Py_CLEAR(self->decoder); 282 Py_CLEAR(self->errors); 283 Py_TYPE(self)->tp_free((PyObject *)self); 284 } 285 286 static int 287 check_decoded(PyObject *decoded) 288 { 289 if (decoded == NULL) 290 return -1; 291 if (!PyUnicode_Check(decoded)) { 292 PyErr_Format(PyExc_TypeError, 293 "decoder should return a string result, not '%.200s'", 294 Py_TYPE(decoded)->tp_name); 295 Py_DECREF(decoded); 296 return -1; 297 } 298 if (PyUnicode_READY(decoded) < 0) { 299 Py_DECREF(decoded); 300 return -1; 301 } 302 return 0; 303 } 304 305 #define SEEN_CR 1 306 #define SEEN_LF 2 307 #define SEEN_CRLF 4 308 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) 309 310 PyObject * 311 _PyIncrementalNewlineDecoder_decode(PyObject *myself, 312 PyObject *input, int final) 313 { 314 PyObject *output; 315 Py_ssize_t output_len; 316 nldecoder_object *self = (nldecoder_object *) myself; 317 318 if (self->decoder == NULL) { 319 PyErr_SetString(PyExc_ValueError, 320 "IncrementalNewlineDecoder.__init__ not called"); 321 return NULL; 322 } 323 324 /* decode input (with the eventual \r from a previous pass) */ 325 if (self->decoder != Py_None) { 326 output = PyObject_CallMethodObjArgs(self->decoder, 327 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL); 328 } 329 else { 330 output = input; 331 Py_INCREF(output); 332 } 333 334 if (check_decoded(output) < 0) 335 return NULL; 336 337 output_len = PyUnicode_GET_LENGTH(output); 338 if (self->pendingcr && (final || output_len > 0)) { 339 /* Prefix output with CR */ 340 int kind; 341 PyObject *modified; 342 char *out; 343 344 modified = PyUnicode_New(output_len + 1, 345 PyUnicode_MAX_CHAR_VALUE(output)); 346 if (modified == NULL) 347 goto error; 348 kind = PyUnicode_KIND(modified); 349 out = PyUnicode_DATA(modified); 350 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); 351 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); 352 Py_DECREF(output); 353 output = modified; /* output remains ready */ 354 self->pendingcr = 0; 355 output_len++; 356 } 357 358 /* retain last \r even when not translating data: 359 * then readline() is sure to get \r\n in one pass 360 */ 361 if (!final) { 362 if (output_len > 0 363 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') 364 { 365 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); 366 if (modified == NULL) 367 goto error; 368 Py_DECREF(output); 369 output = modified; 370 self->pendingcr = 1; 371 } 372 } 373 374 /* Record which newlines are read and do newline translation if desired, 375 all in one pass. */ 376 { 377 void *in_str; 378 Py_ssize_t len; 379 int seennl = self->seennl; 380 int only_lf = 0; 381 int kind; 382 383 in_str = PyUnicode_DATA(output); 384 len = PyUnicode_GET_LENGTH(output); 385 kind = PyUnicode_KIND(output); 386 387 if (len == 0) 388 return output; 389 390 /* If, up to now, newlines are consistently \n, do a quick check 391 for the \r *byte* with the libc's optimized memchr. 392 */ 393 if (seennl == SEEN_LF || seennl == 0) { 394 only_lf = (memchr(in_str, '\r', kind * len) == NULL); 395 } 396 397 if (only_lf) { 398 /* If not already seen, quick scan for a possible "\n" character. 399 (there's nothing else to be done, even when in translation mode) 400 */ 401 if (seennl == 0 && 402 memchr(in_str, '\n', kind * len) != NULL) { 403 if (kind == PyUnicode_1BYTE_KIND) 404 seennl |= SEEN_LF; 405 else { 406 Py_ssize_t i = 0; 407 for (;;) { 408 Py_UCS4 c; 409 /* Fast loop for non-control characters */ 410 while (PyUnicode_READ(kind, in_str, i) > '\n') 411 i++; 412 c = PyUnicode_READ(kind, in_str, i++); 413 if (c == '\n') { 414 seennl |= SEEN_LF; 415 break; 416 } 417 if (i >= len) 418 break; 419 } 420 } 421 } 422 /* Finished: we have scanned for newlines, and none of them 423 need translating */ 424 } 425 else if (!self->translate) { 426 Py_ssize_t i = 0; 427 /* We have already seen all newline types, no need to scan again */ 428 if (seennl == SEEN_ALL) 429 goto endscan; 430 for (;;) { 431 Py_UCS4 c; 432 /* Fast loop for non-control characters */ 433 while (PyUnicode_READ(kind, in_str, i) > '\r') 434 i++; 435 c = PyUnicode_READ(kind, in_str, i++); 436 if (c == '\n') 437 seennl |= SEEN_LF; 438 else if (c == '\r') { 439 if (PyUnicode_READ(kind, in_str, i) == '\n') { 440 seennl |= SEEN_CRLF; 441 i++; 442 } 443 else 444 seennl |= SEEN_CR; 445 } 446 if (i >= len) 447 break; 448 if (seennl == SEEN_ALL) 449 break; 450 } 451 endscan: 452 ; 453 } 454 else { 455 void *translated; 456 int kind = PyUnicode_KIND(output); 457 void *in_str = PyUnicode_DATA(output); 458 Py_ssize_t in, out; 459 /* XXX: Previous in-place translation here is disabled as 460 resizing is not possible anymore */ 461 /* We could try to optimize this so that we only do a copy 462 when there is something to translate. On the other hand, 463 we already know there is a \r byte, so chances are high 464 that something needs to be done. */ 465 translated = PyMem_Malloc(kind * len); 466 if (translated == NULL) { 467 PyErr_NoMemory(); 468 goto error; 469 } 470 in = out = 0; 471 for (;;) { 472 Py_UCS4 c; 473 /* Fast loop for non-control characters */ 474 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') 475 PyUnicode_WRITE(kind, translated, out++, c); 476 if (c == '\n') { 477 PyUnicode_WRITE(kind, translated, out++, c); 478 seennl |= SEEN_LF; 479 continue; 480 } 481 if (c == '\r') { 482 if (PyUnicode_READ(kind, in_str, in) == '\n') { 483 in++; 484 seennl |= SEEN_CRLF; 485 } 486 else 487 seennl |= SEEN_CR; 488 PyUnicode_WRITE(kind, translated, out++, '\n'); 489 continue; 490 } 491 if (in > len) 492 break; 493 PyUnicode_WRITE(kind, translated, out++, c); 494 } 495 Py_DECREF(output); 496 output = PyUnicode_FromKindAndData(kind, translated, out); 497 PyMem_Free(translated); 498 if (!output) 499 return NULL; 500 } 501 self->seennl |= seennl; 502 } 503 504 return output; 505 506 error: 507 Py_DECREF(output); 508 return NULL; 509 } 510 511 /*[clinic input] 512 _io.IncrementalNewlineDecoder.decode 513 input: object 514 final: int(c_default="0") = False 515 [clinic start generated code]*/ 516 517 static PyObject * 518 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, 519 PyObject *input, int final) 520 /*[clinic end generated code: output=0d486755bb37a66e input=d65677385bfd6827]*/ 521 { 522 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); 523 } 524 525 /*[clinic input] 526 _io.IncrementalNewlineDecoder.getstate 527 [clinic start generated code]*/ 528 529 static PyObject * 530 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) 531 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ 532 { 533 PyObject *buffer; 534 unsigned long long flag; 535 536 if (self->decoder != Py_None) { 537 PyObject *state = PyObject_CallMethodObjArgs(self->decoder, 538 _PyIO_str_getstate, NULL); 539 if (state == NULL) 540 return NULL; 541 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) { 542 Py_DECREF(state); 543 return NULL; 544 } 545 Py_INCREF(buffer); 546 Py_DECREF(state); 547 } 548 else { 549 buffer = PyBytes_FromString(""); 550 flag = 0; 551 } 552 flag <<= 1; 553 if (self->pendingcr) 554 flag |= 1; 555 return Py_BuildValue("NK", buffer, flag); 556 } 557 558 /*[clinic input] 559 _io.IncrementalNewlineDecoder.setstate 560 state: object 561 / 562 [clinic start generated code]*/ 563 564 static PyObject * 565 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self, 566 PyObject *state) 567 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/ 568 { 569 PyObject *buffer; 570 unsigned long long flag; 571 572 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) 573 return NULL; 574 575 self->pendingcr = (int) (flag & 1); 576 flag >>= 1; 577 578 if (self->decoder != Py_None) 579 return _PyObject_CallMethodId(self->decoder, 580 &PyId_setstate, "((OK))", buffer, flag); 581 else 582 Py_RETURN_NONE; 583 } 584 585 /*[clinic input] 586 _io.IncrementalNewlineDecoder.reset 587 [clinic start generated code]*/ 588 589 static PyObject * 590 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) 591 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ 592 { 593 self->seennl = 0; 594 self->pendingcr = 0; 595 if (self->decoder != Py_None) 596 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); 597 else 598 Py_RETURN_NONE; 599 } 600 601 static PyObject * 602 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) 603 { 604 switch (self->seennl) { 605 case SEEN_CR: 606 return PyUnicode_FromString("\r"); 607 case SEEN_LF: 608 return PyUnicode_FromString("\n"); 609 case SEEN_CRLF: 610 return PyUnicode_FromString("\r\n"); 611 case SEEN_CR | SEEN_LF: 612 return Py_BuildValue("ss", "\r", "\n"); 613 case SEEN_CR | SEEN_CRLF: 614 return Py_BuildValue("ss", "\r", "\r\n"); 615 case SEEN_LF | SEEN_CRLF: 616 return Py_BuildValue("ss", "\n", "\r\n"); 617 case SEEN_CR | SEEN_LF | SEEN_CRLF: 618 return Py_BuildValue("sss", "\r", "\n", "\r\n"); 619 default: 620 Py_RETURN_NONE; 621 } 622 623 } 624 625 /* TextIOWrapper */ 626 627 typedef PyObject * 628 (*encodefunc_t)(PyObject *, PyObject *); 629 630 typedef struct 631 { 632 PyObject_HEAD 633 int ok; /* initialized? */ 634 int detached; 635 Py_ssize_t chunk_size; 636 PyObject *buffer; 637 PyObject *encoding; 638 PyObject *encoder; 639 PyObject *decoder; 640 PyObject *readnl; 641 PyObject *errors; 642 const char *writenl; /* utf-8 encoded, NULL stands for \n */ 643 char line_buffering; 644 char write_through; 645 char readuniversal; 646 char readtranslate; 647 char writetranslate; 648 char seekable; 649 char has_read1; 650 char telling; 651 char finalizing; 652 /* Specialized encoding func (see below) */ 653 encodefunc_t encodefunc; 654 /* Whether or not it's the start of the stream */ 655 char encoding_start_of_stream; 656 657 /* Reads and writes are internally buffered in order to speed things up. 658 However, any read will first flush the write buffer if itsn't empty. 659 660 Please also note that text to be written is first encoded before being 661 buffered. This is necessary so that encoding errors are immediately 662 reported to the caller, but it unfortunately means that the 663 IncrementalEncoder (whose encode() method is always written in Python) 664 becomes a bottleneck for small writes. 665 */ 666 PyObject *decoded_chars; /* buffer for text returned from decoder */ 667 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ 668 PyObject *pending_bytes; /* list of bytes objects waiting to be 669 written, or NULL */ 670 Py_ssize_t pending_bytes_count; 671 672 /* snapshot is either None, or a tuple (dec_flags, next_input) where 673 * dec_flags is the second (integer) item of the decoder state and 674 * next_input is the chunk of input bytes that comes next after the 675 * snapshot point. We use this to reconstruct decoder states in tell(). 676 */ 677 PyObject *snapshot; 678 /* Bytes-to-characters ratio for the current chunk. Serves as input for 679 the heuristic in tell(). */ 680 double b2cratio; 681 682 /* Cache raw object if it's a FileIO object */ 683 PyObject *raw; 684 685 PyObject *weakreflist; 686 PyObject *dict; 687 } textio; 688 689 /* A couple of specialized cases in order to bypass the slow incremental 690 encoding methods for the most popular encodings. */ 691 692 static PyObject * 693 ascii_encode(textio *self, PyObject *text) 694 { 695 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors)); 696 } 697 698 static PyObject * 699 utf16be_encode(textio *self, PyObject *text) 700 { 701 return _PyUnicode_EncodeUTF16(text, 702 PyBytes_AS_STRING(self->errors), 1); 703 } 704 705 static PyObject * 706 utf16le_encode(textio *self, PyObject *text) 707 { 708 return _PyUnicode_EncodeUTF16(text, 709 PyBytes_AS_STRING(self->errors), -1); 710 } 711 712 static PyObject * 713 utf16_encode(textio *self, PyObject *text) 714 { 715 if (!self->encoding_start_of_stream) { 716 /* Skip the BOM and use native byte ordering */ 717 #if PY_BIG_ENDIAN 718 return utf16be_encode(self, text); 719 #else 720 return utf16le_encode(self, text); 721 #endif 722 } 723 return _PyUnicode_EncodeUTF16(text, 724 PyBytes_AS_STRING(self->errors), 0); 725 } 726 727 static PyObject * 728 utf32be_encode(textio *self, PyObject *text) 729 { 730 return _PyUnicode_EncodeUTF32(text, 731 PyBytes_AS_STRING(self->errors), 1); 732 } 733 734 static PyObject * 735 utf32le_encode(textio *self, PyObject *text) 736 { 737 return _PyUnicode_EncodeUTF32(text, 738 PyBytes_AS_STRING(self->errors), -1); 739 } 740 741 static PyObject * 742 utf32_encode(textio *self, PyObject *text) 743 { 744 if (!self->encoding_start_of_stream) { 745 /* Skip the BOM and use native byte ordering */ 746 #if PY_BIG_ENDIAN 747 return utf32be_encode(self, text); 748 #else 749 return utf32le_encode(self, text); 750 #endif 751 } 752 return _PyUnicode_EncodeUTF32(text, 753 PyBytes_AS_STRING(self->errors), 0); 754 } 755 756 static PyObject * 757 utf8_encode(textio *self, PyObject *text) 758 { 759 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors)); 760 } 761 762 static PyObject * 763 latin1_encode(textio *self, PyObject *text) 764 { 765 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors)); 766 } 767 768 /* Map normalized encoding names onto the specialized encoding funcs */ 769 770 typedef struct { 771 const char *name; 772 encodefunc_t encodefunc; 773 } encodefuncentry; 774 775 static const encodefuncentry encodefuncs[] = { 776 {"ascii", (encodefunc_t) ascii_encode}, 777 {"iso8859-1", (encodefunc_t) latin1_encode}, 778 {"utf-8", (encodefunc_t) utf8_encode}, 779 {"utf-16-be", (encodefunc_t) utf16be_encode}, 780 {"utf-16-le", (encodefunc_t) utf16le_encode}, 781 {"utf-16", (encodefunc_t) utf16_encode}, 782 {"utf-32-be", (encodefunc_t) utf32be_encode}, 783 {"utf-32-le", (encodefunc_t) utf32le_encode}, 784 {"utf-32", (encodefunc_t) utf32_encode}, 785 {NULL, NULL} 786 }; 787 788 789 /*[clinic input] 790 _io.TextIOWrapper.__init__ 791 buffer: object 792 encoding: str(accept={str, NoneType}) = NULL 793 errors: str(accept={str, NoneType}) = NULL 794 newline: str(accept={str, NoneType}) = NULL 795 line_buffering: int(c_default="0") = False 796 write_through: int(c_default="0") = False 797 798 Character and line based layer over a BufferedIOBase object, buffer. 799 800 encoding gives the name of the encoding that the stream will be 801 decoded or encoded with. It defaults to locale.getpreferredencoding(False). 802 803 errors determines the strictness of encoding and decoding (see 804 help(codecs.Codec) or the documentation for codecs.register) and 805 defaults to "strict". 806 807 newline controls how line endings are handled. It can be None, '', 808 '\n', '\r', and '\r\n'. It works as follows: 809 810 * On input, if newline is None, universal newlines mode is 811 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 812 these are translated into '\n' before being returned to the 813 caller. If it is '', universal newline mode is enabled, but line 814 endings are returned to the caller untranslated. If it has any of 815 the other legal values, input lines are only terminated by the given 816 string, and the line ending is returned to the caller untranslated. 817 818 * On output, if newline is None, any '\n' characters written are 819 translated to the system default line separator, os.linesep. If 820 newline is '' or '\n', no translation takes place. If newline is any 821 of the other legal values, any '\n' characters written are translated 822 to the given string. 823 824 If line_buffering is True, a call to flush is implied when a call to 825 write contains a newline character. 826 [clinic start generated code]*/ 827 828 static int 829 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, 830 const char *encoding, const char *errors, 831 const char *newline, int line_buffering, 832 int write_through) 833 /*[clinic end generated code: output=56a83402ce2a8381 input=3126cb3101a2c99b]*/ 834 { 835 PyObject *raw, *codec_info = NULL; 836 _PyIO_State *state = NULL; 837 PyObject *res; 838 int r; 839 840 self->ok = 0; 841 self->detached = 0; 842 843 if (newline && newline[0] != '\0' 844 && !(newline[0] == '\n' && newline[1] == '\0') 845 && !(newline[0] == '\r' && newline[1] == '\0') 846 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { 847 PyErr_Format(PyExc_ValueError, 848 "illegal newline value: %s", newline); 849 return -1; 850 } 851 852 Py_CLEAR(self->buffer); 853 Py_CLEAR(self->encoding); 854 Py_CLEAR(self->encoder); 855 Py_CLEAR(self->decoder); 856 Py_CLEAR(self->readnl); 857 Py_CLEAR(self->decoded_chars); 858 Py_CLEAR(self->pending_bytes); 859 Py_CLEAR(self->snapshot); 860 Py_CLEAR(self->errors); 861 Py_CLEAR(self->raw); 862 self->decoded_chars_used = 0; 863 self->pending_bytes_count = 0; 864 self->encodefunc = NULL; 865 self->b2cratio = 0.0; 866 867 if (encoding == NULL) { 868 /* Try os.device_encoding(fileno) */ 869 PyObject *fileno; 870 state = IO_STATE(); 871 if (state == NULL) 872 goto error; 873 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL); 874 /* Ignore only AttributeError and UnsupportedOperation */ 875 if (fileno == NULL) { 876 if (PyErr_ExceptionMatches(PyExc_AttributeError) || 877 PyErr_ExceptionMatches(state->unsupported_operation)) { 878 PyErr_Clear(); 879 } 880 else { 881 goto error; 882 } 883 } 884 else { 885 int fd = _PyLong_AsInt(fileno); 886 Py_DECREF(fileno); 887 if (fd == -1 && PyErr_Occurred()) { 888 goto error; 889 } 890 891 self->encoding = _Py_device_encoding(fd); 892 if (self->encoding == NULL) 893 goto error; 894 else if (!PyUnicode_Check(self->encoding)) 895 Py_CLEAR(self->encoding); 896 } 897 } 898 if (encoding == NULL && self->encoding == NULL) { 899 PyObject *locale_module = _PyIO_get_locale_module(state); 900 if (locale_module == NULL) 901 goto catch_ImportError; 902 self->encoding = _PyObject_CallMethodId( 903 locale_module, &PyId_getpreferredencoding, "O", Py_False); 904 Py_DECREF(locale_module); 905 if (self->encoding == NULL) { 906 catch_ImportError: 907 /* 908 Importing locale can raise an ImportError because of 909 _functools, and locale.getpreferredencoding can raise an 910 ImportError if _locale is not available. These will happen 911 during module building. 912 */ 913 if (PyErr_ExceptionMatches(PyExc_ImportError)) { 914 PyErr_Clear(); 915 self->encoding = PyUnicode_FromString("ascii"); 916 } 917 else 918 goto error; 919 } 920 else if (!PyUnicode_Check(self->encoding)) 921 Py_CLEAR(self->encoding); 922 } 923 if (self->encoding != NULL) { 924 encoding = PyUnicode_AsUTF8(self->encoding); 925 if (encoding == NULL) 926 goto error; 927 } 928 else if (encoding != NULL) { 929 self->encoding = PyUnicode_FromString(encoding); 930 if (self->encoding == NULL) 931 goto error; 932 } 933 else { 934 PyErr_SetString(PyExc_IOError, 935 "could not determine default encoding"); 936 } 937 938 /* Check we have been asked for a real text encoding */ 939 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()"); 940 if (codec_info == NULL) { 941 Py_CLEAR(self->encoding); 942 goto error; 943 } 944 945 /* XXX: Failures beyond this point have the potential to leak elements 946 * of the partially constructed object (like self->encoding) 947 */ 948 949 if (errors == NULL) 950 errors = "strict"; 951 self->errors = PyBytes_FromString(errors); 952 if (self->errors == NULL) 953 goto error; 954 955 self->chunk_size = 8192; 956 self->readuniversal = (newline == NULL || newline[0] == '\0'); 957 self->line_buffering = line_buffering; 958 self->write_through = write_through; 959 self->readtranslate = (newline == NULL); 960 if (newline) { 961 self->readnl = PyUnicode_FromString(newline); 962 if (self->readnl == NULL) 963 goto error; 964 } 965 self->writetranslate = (newline == NULL || newline[0] != '\0'); 966 if (!self->readuniversal && self->readnl) { 967 self->writenl = PyUnicode_AsUTF8(self->readnl); 968 if (self->writenl == NULL) 969 goto error; 970 if (!strcmp(self->writenl, "\n")) 971 self->writenl = NULL; 972 } 973 #ifdef MS_WINDOWS 974 else 975 self->writenl = "\r\n"; 976 #endif 977 978 /* Build the decoder object */ 979 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL); 980 if (res == NULL) 981 goto error; 982 r = PyObject_IsTrue(res); 983 Py_DECREF(res); 984 if (r == -1) 985 goto error; 986 if (r == 1) { 987 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, 988 errors); 989 if (self->decoder == NULL) 990 goto error; 991 992 if (self->readuniversal) { 993 PyObject *incrementalDecoder = PyObject_CallFunction( 994 (PyObject *)&PyIncrementalNewlineDecoder_Type, 995 "Oi", self->decoder, (int)self->readtranslate); 996 if (incrementalDecoder == NULL) 997 goto error; 998 Py_XSETREF(self->decoder, incrementalDecoder); 999 } 1000 } 1001 1002 /* Build the encoder object */ 1003 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL); 1004 if (res == NULL) 1005 goto error; 1006 r = PyObject_IsTrue(res); 1007 Py_DECREF(res); 1008 if (r == -1) 1009 goto error; 1010 if (r == 1) { 1011 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, 1012 errors); 1013 if (self->encoder == NULL) 1014 goto error; 1015 /* Get the normalized name of the codec */ 1016 res = _PyObject_GetAttrId(codec_info, &PyId_name); 1017 if (res == NULL) { 1018 if (PyErr_ExceptionMatches(PyExc_AttributeError)) 1019 PyErr_Clear(); 1020 else 1021 goto error; 1022 } 1023 else if (PyUnicode_Check(res)) { 1024 const encodefuncentry *e = encodefuncs; 1025 while (e->name != NULL) { 1026 if (_PyUnicode_EqualToASCIIString(res, e->name)) { 1027 self->encodefunc = e->encodefunc; 1028 break; 1029 } 1030 e++; 1031 } 1032 } 1033 Py_XDECREF(res); 1034 } 1035 1036 /* Finished sorting out the codec details */ 1037 Py_CLEAR(codec_info); 1038 1039 self->buffer = buffer; 1040 Py_INCREF(buffer); 1041 1042 if (Py_TYPE(buffer) == &PyBufferedReader_Type || 1043 Py_TYPE(buffer) == &PyBufferedWriter_Type || 1044 Py_TYPE(buffer) == &PyBufferedRandom_Type) { 1045 raw = _PyObject_GetAttrId(buffer, &PyId_raw); 1046 /* Cache the raw FileIO object to speed up 'closed' checks */ 1047 if (raw == NULL) { 1048 if (PyErr_ExceptionMatches(PyExc_AttributeError)) 1049 PyErr_Clear(); 1050 else 1051 goto error; 1052 } 1053 else if (Py_TYPE(raw) == &PyFileIO_Type) 1054 self->raw = raw; 1055 else 1056 Py_DECREF(raw); 1057 } 1058 1059 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL); 1060 if (res == NULL) 1061 goto error; 1062 r = PyObject_IsTrue(res); 1063 Py_DECREF(res); 1064 if (r < 0) 1065 goto error; 1066 self->seekable = self->telling = r; 1067 1068 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1); 1069 1070 self->encoding_start_of_stream = 0; 1071 if (self->seekable && self->encoder) { 1072 PyObject *cookieObj; 1073 int cmp; 1074 1075 self->encoding_start_of_stream = 1; 1076 1077 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL); 1078 if (cookieObj == NULL) 1079 goto error; 1080 1081 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); 1082 Py_DECREF(cookieObj); 1083 if (cmp < 0) { 1084 goto error; 1085 } 1086 1087 if (cmp == 0) { 1088 self->encoding_start_of_stream = 0; 1089 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, 1090 _PyIO_zero, NULL); 1091 if (res == NULL) 1092 goto error; 1093 Py_DECREF(res); 1094 } 1095 } 1096 1097 self->ok = 1; 1098 return 0; 1099 1100 error: 1101 Py_XDECREF(codec_info); 1102 return -1; 1103 } 1104 1105 static int 1106 textiowrapper_clear(textio *self) 1107 { 1108 self->ok = 0; 1109 Py_CLEAR(self->buffer); 1110 Py_CLEAR(self->encoding); 1111 Py_CLEAR(self->encoder); 1112 Py_CLEAR(self->decoder); 1113 Py_CLEAR(self->readnl); 1114 Py_CLEAR(self->decoded_chars); 1115 Py_CLEAR(self->pending_bytes); 1116 Py_CLEAR(self->snapshot); 1117 Py_CLEAR(self->errors); 1118 Py_CLEAR(self->raw); 1119 1120 Py_CLEAR(self->dict); 1121 return 0; 1122 } 1123 1124 static void 1125 textiowrapper_dealloc(textio *self) 1126 { 1127 self->finalizing = 1; 1128 if (_PyIOBase_finalize((PyObject *) self) < 0) 1129 return; 1130 self->ok = 0; 1131 _PyObject_GC_UNTRACK(self); 1132 if (self->weakreflist != NULL) 1133 PyObject_ClearWeakRefs((PyObject *)self); 1134 textiowrapper_clear(self); 1135 Py_TYPE(self)->tp_free((PyObject *)self); 1136 } 1137 1138 static int 1139 textiowrapper_traverse(textio *self, visitproc visit, void *arg) 1140 { 1141 Py_VISIT(self->buffer); 1142 Py_VISIT(self->encoding); 1143 Py_VISIT(self->encoder); 1144 Py_VISIT(self->decoder); 1145 Py_VISIT(self->readnl); 1146 Py_VISIT(self->decoded_chars); 1147 Py_VISIT(self->pending_bytes); 1148 Py_VISIT(self->snapshot); 1149 Py_VISIT(self->errors); 1150 Py_VISIT(self->raw); 1151 1152 Py_VISIT(self->dict); 1153 return 0; 1154 } 1155 1156 static PyObject * 1157 textiowrapper_closed_get(textio *self, void *context); 1158 1159 /* This macro takes some shortcuts to make the common case faster. */ 1160 #define CHECK_CLOSED(self) \ 1161 do { \ 1162 int r; \ 1163 PyObject *_res; \ 1164 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \ 1165 if (self->raw != NULL) \ 1166 r = _PyFileIO_closed(self->raw); \ 1167 else { \ 1168 _res = textiowrapper_closed_get(self, NULL); \ 1169 if (_res == NULL) \ 1170 return NULL; \ 1171 r = PyObject_IsTrue(_res); \ 1172 Py_DECREF(_res); \ 1173 if (r < 0) \ 1174 return NULL; \ 1175 } \ 1176 if (r > 0) { \ 1177 PyErr_SetString(PyExc_ValueError, \ 1178 "I/O operation on closed file."); \ 1179 return NULL; \ 1180 } \ 1181 } \ 1182 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \ 1183 return NULL; \ 1184 } while (0) 1185 1186 #define CHECK_INITIALIZED(self) \ 1187 if (self->ok <= 0) { \ 1188 PyErr_SetString(PyExc_ValueError, \ 1189 "I/O operation on uninitialized object"); \ 1190 return NULL; \ 1191 } 1192 1193 #define CHECK_ATTACHED(self) \ 1194 CHECK_INITIALIZED(self); \ 1195 if (self->detached) { \ 1196 PyErr_SetString(PyExc_ValueError, \ 1197 "underlying buffer has been detached"); \ 1198 return NULL; \ 1199 } 1200 1201 #define CHECK_ATTACHED_INT(self) \ 1202 if (self->ok <= 0) { \ 1203 PyErr_SetString(PyExc_ValueError, \ 1204 "I/O operation on uninitialized object"); \ 1205 return -1; \ 1206 } else if (self->detached) { \ 1207 PyErr_SetString(PyExc_ValueError, \ 1208 "underlying buffer has been detached"); \ 1209 return -1; \ 1210 } 1211 1212 1213 /*[clinic input] 1214 _io.TextIOWrapper.detach 1215 [clinic start generated code]*/ 1216 1217 static PyObject * 1218 _io_TextIOWrapper_detach_impl(textio *self) 1219 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/ 1220 { 1221 PyObject *buffer, *res; 1222 CHECK_ATTACHED(self); 1223 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); 1224 if (res == NULL) 1225 return NULL; 1226 Py_DECREF(res); 1227 buffer = self->buffer; 1228 self->buffer = NULL; 1229 self->detached = 1; 1230 return buffer; 1231 } 1232 1233 /* Flush the internal write buffer. This doesn't explicitly flush the 1234 underlying buffered object, though. */ 1235 static int 1236 _textiowrapper_writeflush(textio *self) 1237 { 1238 PyObject *pending, *b, *ret; 1239 1240 if (self->pending_bytes == NULL) 1241 return 0; 1242 1243 pending = self->pending_bytes; 1244 Py_INCREF(pending); 1245 self->pending_bytes_count = 0; 1246 Py_CLEAR(self->pending_bytes); 1247 1248 b = _PyBytes_Join(_PyIO_empty_bytes, pending); 1249 Py_DECREF(pending); 1250 if (b == NULL) 1251 return -1; 1252 ret = NULL; 1253 do { 1254 ret = PyObject_CallMethodObjArgs(self->buffer, 1255 _PyIO_str_write, b, NULL); 1256 } while (ret == NULL && _PyIO_trap_eintr()); 1257 Py_DECREF(b); 1258 if (ret == NULL) 1259 return -1; 1260 Py_DECREF(ret); 1261 return 0; 1262 } 1263 1264 /*[clinic input] 1265 _io.TextIOWrapper.write 1266 text: unicode 1267 / 1268 [clinic start generated code]*/ 1269 1270 static PyObject * 1271 _io_TextIOWrapper_write_impl(textio *self, PyObject *text) 1272 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/ 1273 { 1274 PyObject *ret; 1275 PyObject *b; 1276 Py_ssize_t textlen; 1277 int haslf = 0; 1278 int needflush = 0, text_needflush = 0; 1279 1280 if (PyUnicode_READY(text) == -1) 1281 return NULL; 1282 1283 CHECK_ATTACHED(self); 1284 CHECK_CLOSED(self); 1285 1286 if (self->encoder == NULL) 1287 return _unsupported("not writable"); 1288 1289 Py_INCREF(text); 1290 1291 textlen = PyUnicode_GET_LENGTH(text); 1292 1293 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) 1294 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) 1295 haslf = 1; 1296 1297 if (haslf && self->writetranslate && self->writenl != NULL) { 1298 PyObject *newtext = _PyObject_CallMethodId( 1299 text, &PyId_replace, "ss", "\n", self->writenl); 1300 Py_DECREF(text); 1301 if (newtext == NULL) 1302 return NULL; 1303 text = newtext; 1304 } 1305 1306 if (self->write_through) 1307 text_needflush = 1; 1308 if (self->line_buffering && 1309 (haslf || 1310 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) 1311 needflush = 1; 1312 1313 /* XXX What if we were just reading? */ 1314 if (self->encodefunc != NULL) { 1315 b = (*self->encodefunc)((PyObject *) self, text); 1316 self->encoding_start_of_stream = 0; 1317 } 1318 else 1319 b = PyObject_CallMethodObjArgs(self->encoder, 1320 _PyIO_str_encode, text, NULL); 1321 Py_DECREF(text); 1322 if (b == NULL) 1323 return NULL; 1324 1325 if (self->pending_bytes == NULL) { 1326 self->pending_bytes = PyList_New(0); 1327 if (self->pending_bytes == NULL) { 1328 Py_DECREF(b); 1329 return NULL; 1330 } 1331 self->pending_bytes_count = 0; 1332 } 1333 if (PyList_Append(self->pending_bytes, b) < 0) { 1334 Py_DECREF(b); 1335 return NULL; 1336 } 1337 self->pending_bytes_count += PyBytes_GET_SIZE(b); 1338 Py_DECREF(b); 1339 if (self->pending_bytes_count > self->chunk_size || needflush || 1340 text_needflush) { 1341 if (_textiowrapper_writeflush(self) < 0) 1342 return NULL; 1343 } 1344 1345 if (needflush) { 1346 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL); 1347 if (ret == NULL) 1348 return NULL; 1349 Py_DECREF(ret); 1350 } 1351 1352 Py_CLEAR(self->snapshot); 1353 1354 if (self->decoder) { 1355 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL); 1356 if (ret == NULL) 1357 return NULL; 1358 Py_DECREF(ret); 1359 } 1360 1361 return PyLong_FromSsize_t(textlen); 1362 } 1363 1364 /* Steal a reference to chars and store it in the decoded_char buffer; 1365 */ 1366 static void 1367 textiowrapper_set_decoded_chars(textio *self, PyObject *chars) 1368 { 1369 Py_XSETREF(self->decoded_chars, chars); 1370 self->decoded_chars_used = 0; 1371 } 1372 1373 static PyObject * 1374 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) 1375 { 1376 PyObject *chars; 1377 Py_ssize_t avail; 1378 1379 if (self->decoded_chars == NULL) 1380 return PyUnicode_FromStringAndSize(NULL, 0); 1381 1382 /* decoded_chars is guaranteed to be "ready". */ 1383 avail = (PyUnicode_GET_LENGTH(self->decoded_chars) 1384 - self->decoded_chars_used); 1385 1386 assert(avail >= 0); 1387 1388 if (n < 0 || n > avail) 1389 n = avail; 1390 1391 if (self->decoded_chars_used > 0 || n < avail) { 1392 chars = PyUnicode_Substring(self->decoded_chars, 1393 self->decoded_chars_used, 1394 self->decoded_chars_used + n); 1395 if (chars == NULL) 1396 return NULL; 1397 } 1398 else { 1399 chars = self->decoded_chars; 1400 Py_INCREF(chars); 1401 } 1402 1403 self->decoded_chars_used += n; 1404 return chars; 1405 } 1406 1407 /* Read and decode the next chunk of data from the BufferedReader. 1408 */ 1409 static int 1410 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) 1411 { 1412 PyObject *dec_buffer = NULL; 1413 PyObject *dec_flags = NULL; 1414 PyObject *input_chunk = NULL; 1415 Py_buffer input_chunk_buf; 1416 PyObject *decoded_chars, *chunk_size; 1417 Py_ssize_t nbytes, nchars; 1418 int eof; 1419 1420 /* The return value is True unless EOF was reached. The decoded string is 1421 * placed in self._decoded_chars (replacing its previous value). The 1422 * entire input chunk is sent to the decoder, though some of it may remain 1423 * buffered in the decoder, yet to be converted. 1424 */ 1425 1426 if (self->decoder == NULL) { 1427 _unsupported("not readable"); 1428 return -1; 1429 } 1430 1431 if (self->telling) { 1432 /* To prepare for tell(), we need to snapshot a point in the file 1433 * where the decoder's input buffer is empty. 1434 */ 1435 1436 PyObject *state = PyObject_CallMethodObjArgs(self->decoder, 1437 _PyIO_str_getstate, NULL); 1438 if (state == NULL) 1439 return -1; 1440 /* Given this, we know there was a valid snapshot point 1441 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 1442 */ 1443 if (PyArg_ParseTuple(state, "OO", &dec_buffer, &dec_flags) < 0) { 1444 Py_DECREF(state); 1445 return -1; 1446 } 1447 1448 if (!PyBytes_Check(dec_buffer)) { 1449 PyErr_Format(PyExc_TypeError, 1450 "decoder getstate() should have returned a bytes " 1451 "object, not '%.200s'", 1452 Py_TYPE(dec_buffer)->tp_name); 1453 Py_DECREF(state); 1454 return -1; 1455 } 1456 Py_INCREF(dec_buffer); 1457 Py_INCREF(dec_flags); 1458 Py_DECREF(state); 1459 } 1460 1461 /* Read a chunk, decode it, and put the result in self._decoded_chars. */ 1462 if (size_hint > 0) { 1463 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint); 1464 } 1465 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); 1466 if (chunk_size == NULL) 1467 goto fail; 1468 1469 input_chunk = PyObject_CallMethodObjArgs(self->buffer, 1470 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read), 1471 chunk_size, NULL); 1472 Py_DECREF(chunk_size); 1473 if (input_chunk == NULL) 1474 goto fail; 1475 1476 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { 1477 PyErr_Format(PyExc_TypeError, 1478 "underlying %s() should have returned a bytes-like object, " 1479 "not '%.200s'", (self->has_read1 ? "read1": "read"), 1480 Py_TYPE(input_chunk)->tp_name); 1481 goto fail; 1482 } 1483 1484 nbytes = input_chunk_buf.len; 1485 eof = (nbytes == 0); 1486 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { 1487 decoded_chars = _PyIncrementalNewlineDecoder_decode( 1488 self->decoder, input_chunk, eof); 1489 } 1490 else { 1491 decoded_chars = PyObject_CallMethodObjArgs(self->decoder, 1492 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); 1493 } 1494 PyBuffer_Release(&input_chunk_buf); 1495 1496 if (check_decoded(decoded_chars) < 0) 1497 goto fail; 1498 textiowrapper_set_decoded_chars(self, decoded_chars); 1499 nchars = PyUnicode_GET_LENGTH(decoded_chars); 1500 if (nchars > 0) 1501 self->b2cratio = (double) nbytes / nchars; 1502 else 1503 self->b2cratio = 0.0; 1504 if (nchars > 0) 1505 eof = 0; 1506 1507 if (self->telling) { 1508 /* At the snapshot point, len(dec_buffer) bytes before the read, the 1509 * next input to be decoded is dec_buffer + input_chunk. 1510 */ 1511 PyObject *next_input = dec_buffer; 1512 PyBytes_Concat(&next_input, input_chunk); 1513 if (next_input == NULL) { 1514 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ 1515 goto fail; 1516 } 1517 Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input)); 1518 } 1519 Py_DECREF(input_chunk); 1520 1521 return (eof == 0); 1522 1523 fail: 1524 Py_XDECREF(dec_buffer); 1525 Py_XDECREF(dec_flags); 1526 Py_XDECREF(input_chunk); 1527 return -1; 1528 } 1529 1530 /*[clinic input] 1531 _io.TextIOWrapper.read 1532 size as n: io_ssize_t = -1 1533 / 1534 [clinic start generated code]*/ 1535 1536 static PyObject * 1537 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) 1538 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=8c09398424085cca]*/ 1539 { 1540 PyObject *result = NULL, *chunks = NULL; 1541 1542 CHECK_ATTACHED(self); 1543 CHECK_CLOSED(self); 1544 1545 if (self->decoder == NULL) 1546 return _unsupported("not readable"); 1547 1548 if (_textiowrapper_writeflush(self) < 0) 1549 return NULL; 1550 1551 if (n < 0) { 1552 /* Read everything */ 1553 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL); 1554 PyObject *decoded; 1555 if (bytes == NULL) 1556 goto fail; 1557 1558 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) 1559 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, 1560 bytes, 1); 1561 else 1562 decoded = PyObject_CallMethodObjArgs( 1563 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); 1564 Py_DECREF(bytes); 1565 if (check_decoded(decoded) < 0) 1566 goto fail; 1567 1568 result = textiowrapper_get_decoded_chars(self, -1); 1569 1570 if (result == NULL) { 1571 Py_DECREF(decoded); 1572 return NULL; 1573 } 1574 1575 PyUnicode_AppendAndDel(&result, decoded); 1576 if (result == NULL) 1577 goto fail; 1578 1579 Py_CLEAR(self->snapshot); 1580 return result; 1581 } 1582 else { 1583 int res = 1; 1584 Py_ssize_t remaining = n; 1585 1586 result = textiowrapper_get_decoded_chars(self, n); 1587 if (result == NULL) 1588 goto fail; 1589 if (PyUnicode_READY(result) == -1) 1590 goto fail; 1591 remaining -= PyUnicode_GET_LENGTH(result); 1592 1593 /* Keep reading chunks until we have n characters to return */ 1594 while (remaining > 0) { 1595 res = textiowrapper_read_chunk(self, remaining); 1596 if (res < 0) { 1597 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() 1598 when EINTR occurs so we needn't do it ourselves. */ 1599 if (_PyIO_trap_eintr()) { 1600 continue; 1601 } 1602 goto fail; 1603 } 1604 if (res == 0) /* EOF */ 1605 break; 1606 if (chunks == NULL) { 1607 chunks = PyList_New(0); 1608 if (chunks == NULL) 1609 goto fail; 1610 } 1611 if (PyUnicode_GET_LENGTH(result) > 0 && 1612 PyList_Append(chunks, result) < 0) 1613 goto fail; 1614 Py_DECREF(result); 1615 result = textiowrapper_get_decoded_chars(self, remaining); 1616 if (result == NULL) 1617 goto fail; 1618 remaining -= PyUnicode_GET_LENGTH(result); 1619 } 1620 if (chunks != NULL) { 1621 if (result != NULL && PyList_Append(chunks, result) < 0) 1622 goto fail; 1623 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks)); 1624 if (result == NULL) 1625 goto fail; 1626 Py_CLEAR(chunks); 1627 } 1628 return result; 1629 } 1630 fail: 1631 Py_XDECREF(result); 1632 Py_XDECREF(chunks); 1633 return NULL; 1634 } 1635 1636 1637 /* NOTE: `end` must point to the real end of the Py_UCS4 storage, 1638 that is to the NUL character. Otherwise the function will produce 1639 incorrect results. */ 1640 static const char * 1641 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch) 1642 { 1643 if (kind == PyUnicode_1BYTE_KIND) { 1644 assert(ch < 256); 1645 return (char *) memchr((void *) s, (char) ch, end - s); 1646 } 1647 for (;;) { 1648 while (PyUnicode_READ(kind, s, 0) > ch) 1649 s += kind; 1650 if (PyUnicode_READ(kind, s, 0) == ch) 1651 return s; 1652 if (s == end) 1653 return NULL; 1654 s += kind; 1655 } 1656 } 1657 1658 Py_ssize_t 1659 _PyIO_find_line_ending( 1660 int translated, int universal, PyObject *readnl, 1661 int kind, const char *start, const char *end, Py_ssize_t *consumed) 1662 { 1663 Py_ssize_t len = ((char*)end - (char*)start)/kind; 1664 1665 if (translated) { 1666 /* Newlines are already translated, only search for \n */ 1667 const char *pos = find_control_char(kind, start, end, '\n'); 1668 if (pos != NULL) 1669 return (pos - start)/kind + 1; 1670 else { 1671 *consumed = len; 1672 return -1; 1673 } 1674 } 1675 else if (universal) { 1676 /* Universal newline search. Find any of \r, \r\n, \n 1677 * The decoder ensures that \r\n are not split in two pieces 1678 */ 1679 const char *s = start; 1680 for (;;) { 1681 Py_UCS4 ch; 1682 /* Fast path for non-control chars. The loop always ends 1683 since the Unicode string is NUL-terminated. */ 1684 while (PyUnicode_READ(kind, s, 0) > '\r') 1685 s += kind; 1686 if (s >= end) { 1687 *consumed = len; 1688 return -1; 1689 } 1690 ch = PyUnicode_READ(kind, s, 0); 1691 s += kind; 1692 if (ch == '\n') 1693 return (s - start)/kind; 1694 if (ch == '\r') { 1695 if (PyUnicode_READ(kind, s, 0) == '\n') 1696 return (s - start)/kind + 1; 1697 else 1698 return (s - start)/kind; 1699 } 1700 } 1701 } 1702 else { 1703 /* Non-universal mode. */ 1704 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); 1705 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); 1706 /* Assume that readnl is an ASCII character. */ 1707 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); 1708 if (readnl_len == 1) { 1709 const char *pos = find_control_char(kind, start, end, nl[0]); 1710 if (pos != NULL) 1711 return (pos - start)/kind + 1; 1712 *consumed = len; 1713 return -1; 1714 } 1715 else { 1716 const char *s = start; 1717 const char *e = end - (readnl_len - 1)*kind; 1718 const char *pos; 1719 if (e < s) 1720 e = s; 1721 while (s < e) { 1722 Py_ssize_t i; 1723 const char *pos = find_control_char(kind, s, end, nl[0]); 1724 if (pos == NULL || pos >= e) 1725 break; 1726 for (i = 1; i < readnl_len; i++) { 1727 if (PyUnicode_READ(kind, pos, i) != nl[i]) 1728 break; 1729 } 1730 if (i == readnl_len) 1731 return (pos - start)/kind + readnl_len; 1732 s = pos + kind; 1733 } 1734 pos = find_control_char(kind, e, end, nl[0]); 1735 if (pos == NULL) 1736 *consumed = len; 1737 else 1738 *consumed = (pos - start)/kind; 1739 return -1; 1740 } 1741 } 1742 } 1743 1744 static PyObject * 1745 _textiowrapper_readline(textio *self, Py_ssize_t limit) 1746 { 1747 PyObject *line = NULL, *chunks = NULL, *remaining = NULL; 1748 Py_ssize_t start, endpos, chunked, offset_to_buffer; 1749 int res; 1750 1751 CHECK_CLOSED(self); 1752 1753 if (_textiowrapper_writeflush(self) < 0) 1754 return NULL; 1755 1756 chunked = 0; 1757 1758 while (1) { 1759 char *ptr; 1760 Py_ssize_t line_len; 1761 int kind; 1762 Py_ssize_t consumed = 0; 1763 1764 /* First, get some data if necessary */ 1765 res = 1; 1766 while (!self->decoded_chars || 1767 !PyUnicode_GET_LENGTH(self->decoded_chars)) { 1768 res = textiowrapper_read_chunk(self, 0); 1769 if (res < 0) { 1770 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() 1771 when EINTR occurs so we needn't do it ourselves. */ 1772 if (_PyIO_trap_eintr()) { 1773 continue; 1774 } 1775 goto error; 1776 } 1777 if (res == 0) 1778 break; 1779 } 1780 if (res == 0) { 1781 /* end of file */ 1782 textiowrapper_set_decoded_chars(self, NULL); 1783 Py_CLEAR(self->snapshot); 1784 start = endpos = offset_to_buffer = 0; 1785 break; 1786 } 1787 1788 if (remaining == NULL) { 1789 line = self->decoded_chars; 1790 start = self->decoded_chars_used; 1791 offset_to_buffer = 0; 1792 Py_INCREF(line); 1793 } 1794 else { 1795 assert(self->decoded_chars_used == 0); 1796 line = PyUnicode_Concat(remaining, self->decoded_chars); 1797 start = 0; 1798 offset_to_buffer = PyUnicode_GET_LENGTH(remaining); 1799 Py_CLEAR(remaining); 1800 if (line == NULL) 1801 goto error; 1802 if (PyUnicode_READY(line) == -1) 1803 goto error; 1804 } 1805 1806 ptr = PyUnicode_DATA(line); 1807 line_len = PyUnicode_GET_LENGTH(line); 1808 kind = PyUnicode_KIND(line); 1809 1810 endpos = _PyIO_find_line_ending( 1811 self->readtranslate, self->readuniversal, self->readnl, 1812 kind, 1813 ptr + kind * start, 1814 ptr + kind * line_len, 1815 &consumed); 1816 if (endpos >= 0) { 1817 endpos += start; 1818 if (limit >= 0 && (endpos - start) + chunked >= limit) 1819 endpos = start + limit - chunked; 1820 break; 1821 } 1822 1823 /* We can put aside up to `endpos` */ 1824 endpos = consumed + start; 1825 if (limit >= 0 && (endpos - start) + chunked >= limit) { 1826 /* Didn't find line ending, but reached length limit */ 1827 endpos = start + limit - chunked; 1828 break; 1829 } 1830 1831 if (endpos > start) { 1832 /* No line ending seen yet - put aside current data */ 1833 PyObject *s; 1834 if (chunks == NULL) { 1835 chunks = PyList_New(0); 1836 if (chunks == NULL) 1837 goto error; 1838 } 1839 s = PyUnicode_Substring(line, start, endpos); 1840 if (s == NULL) 1841 goto error; 1842 if (PyList_Append(chunks, s) < 0) { 1843 Py_DECREF(s); 1844 goto error; 1845 } 1846 chunked += PyUnicode_GET_LENGTH(s); 1847 Py_DECREF(s); 1848 } 1849 /* There may be some remaining bytes we'll have to prepend to the 1850 next chunk of data */ 1851 if (endpos < line_len) { 1852 remaining = PyUnicode_Substring(line, endpos, line_len); 1853 if (remaining == NULL) 1854 goto error; 1855 } 1856 Py_CLEAR(line); 1857 /* We have consumed the buffer */ 1858 textiowrapper_set_decoded_chars(self, NULL); 1859 } 1860 1861 if (line != NULL) { 1862 /* Our line ends in the current buffer */ 1863 self->decoded_chars_used = endpos - offset_to_buffer; 1864 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { 1865 PyObject *s = PyUnicode_Substring(line, start, endpos); 1866 Py_CLEAR(line); 1867 if (s == NULL) 1868 goto error; 1869 line = s; 1870 } 1871 } 1872 if (remaining != NULL) { 1873 if (chunks == NULL) { 1874 chunks = PyList_New(0); 1875 if (chunks == NULL) 1876 goto error; 1877 } 1878 if (PyList_Append(chunks, remaining) < 0) 1879 goto error; 1880 Py_CLEAR(remaining); 1881 } 1882 if (chunks != NULL) { 1883 if (line != NULL) { 1884 if (PyList_Append(chunks, line) < 0) 1885 goto error; 1886 Py_DECREF(line); 1887 } 1888 line = PyUnicode_Join(_PyIO_empty_str, chunks); 1889 if (line == NULL) 1890 goto error; 1891 Py_CLEAR(chunks); 1892 } 1893 if (line == NULL) { 1894 Py_INCREF(_PyIO_empty_str); 1895 line = _PyIO_empty_str; 1896 } 1897 1898 return line; 1899 1900 error: 1901 Py_XDECREF(chunks); 1902 Py_XDECREF(remaining); 1903 Py_XDECREF(line); 1904 return NULL; 1905 } 1906 1907 /*[clinic input] 1908 _io.TextIOWrapper.readline 1909 size: Py_ssize_t = -1 1910 / 1911 [clinic start generated code]*/ 1912 1913 static PyObject * 1914 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size) 1915 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/ 1916 { 1917 CHECK_ATTACHED(self); 1918 return _textiowrapper_readline(self, size); 1919 } 1920 1921 /* Seek and Tell */ 1922 1923 typedef struct { 1924 Py_off_t start_pos; 1925 int dec_flags; 1926 int bytes_to_feed; 1927 int chars_to_skip; 1928 char need_eof; 1929 } cookie_type; 1930 1931 /* 1932 To speed up cookie packing/unpacking, we store the fields in a temporary 1933 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). 1934 The following macros define at which offsets in the intermediary byte 1935 string the various CookieStruct fields will be stored. 1936 */ 1937 1938 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) 1939 1940 #if PY_BIG_ENDIAN 1941 /* We want the least significant byte of start_pos to also be the least 1942 significant byte of the cookie, which means that in big-endian mode we 1943 must copy the fields in reverse order. */ 1944 1945 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) 1946 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) 1947 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) 1948 # define OFF_CHARS_TO_SKIP (sizeof(char)) 1949 # define OFF_NEED_EOF 0 1950 1951 #else 1952 /* Little-endian mode: the least significant byte of start_pos will 1953 naturally end up the least significant byte of the cookie. */ 1954 1955 # define OFF_START_POS 0 1956 # define OFF_DEC_FLAGS (sizeof(Py_off_t)) 1957 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) 1958 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) 1959 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) 1960 1961 #endif 1962 1963 static int 1964 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) 1965 { 1966 unsigned char buffer[COOKIE_BUF_LEN]; 1967 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); 1968 if (cookieLong == NULL) 1969 return -1; 1970 1971 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), 1972 PY_LITTLE_ENDIAN, 0) < 0) { 1973 Py_DECREF(cookieLong); 1974 return -1; 1975 } 1976 Py_DECREF(cookieLong); 1977 1978 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); 1979 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); 1980 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); 1981 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); 1982 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); 1983 1984 return 0; 1985 } 1986 1987 static PyObject * 1988 textiowrapper_build_cookie(cookie_type *cookie) 1989 { 1990 unsigned char buffer[COOKIE_BUF_LEN]; 1991 1992 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); 1993 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); 1994 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); 1995 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); 1996 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); 1997 1998 return _PyLong_FromByteArray(buffer, sizeof(buffer), 1999 PY_LITTLE_ENDIAN, 0); 2000 } 2001 2002 static int 2003 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) 2004 { 2005 PyObject *res; 2006 /* When seeking to the start of the stream, we call decoder.reset() 2007 rather than decoder.getstate(). 2008 This is for a few decoders such as utf-16 for which the state value 2009 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of 2010 utf-16, that we are expecting a BOM). 2011 */ 2012 if (cookie->start_pos == 0 && cookie->dec_flags == 0) 2013 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); 2014 else 2015 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, 2016 "((yi))", "", cookie->dec_flags); 2017 if (res == NULL) 2018 return -1; 2019 Py_DECREF(res); 2020 return 0; 2021 } 2022 2023 static int 2024 _textiowrapper_encoder_reset(textio *self, int start_of_stream) 2025 { 2026 PyObject *res; 2027 if (start_of_stream) { 2028 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); 2029 self->encoding_start_of_stream = 1; 2030 } 2031 else { 2032 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, 2033 _PyIO_zero, NULL); 2034 self->encoding_start_of_stream = 0; 2035 } 2036 if (res == NULL) 2037 return -1; 2038 Py_DECREF(res); 2039 return 0; 2040 } 2041 2042 static int 2043 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) 2044 { 2045 /* Same as _textiowrapper_decoder_setstate() above. */ 2046 return _textiowrapper_encoder_reset( 2047 self, cookie->start_pos == 0 && cookie->dec_flags == 0); 2048 } 2049 2050 /*[clinic input] 2051 _io.TextIOWrapper.seek 2052 cookie as cookieObj: object 2053 whence: int = 0 2054 / 2055 [clinic start generated code]*/ 2056 2057 static PyObject * 2058 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) 2059 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/ 2060 { 2061 PyObject *posobj; 2062 cookie_type cookie; 2063 PyObject *res; 2064 int cmp; 2065 2066 CHECK_ATTACHED(self); 2067 CHECK_CLOSED(self); 2068 2069 Py_INCREF(cookieObj); 2070 2071 if (!self->seekable) { 2072 _unsupported("underlying stream is not seekable"); 2073 goto fail; 2074 } 2075 2076 if (whence == 1) { 2077 /* seek relative to current position */ 2078 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); 2079 if (cmp < 0) 2080 goto fail; 2081 2082 if (cmp == 0) { 2083 _unsupported("can't do nonzero cur-relative seeks"); 2084 goto fail; 2085 } 2086 2087 /* Seeking to the current position should attempt to 2088 * sync the underlying buffer with the current position. 2089 */ 2090 Py_DECREF(cookieObj); 2091 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL); 2092 if (cookieObj == NULL) 2093 goto fail; 2094 } 2095 else if (whence == 2) { 2096 /* seek relative to end of file */ 2097 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ); 2098 if (cmp < 0) 2099 goto fail; 2100 2101 if (cmp == 0) { 2102 _unsupported("can't do nonzero end-relative seeks"); 2103 goto fail; 2104 } 2105 2106 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); 2107 if (res == NULL) 2108 goto fail; 2109 Py_DECREF(res); 2110 2111 textiowrapper_set_decoded_chars(self, NULL); 2112 Py_CLEAR(self->snapshot); 2113 if (self->decoder) { 2114 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL); 2115 if (res == NULL) 2116 goto fail; 2117 Py_DECREF(res); 2118 } 2119 2120 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); 2121 Py_CLEAR(cookieObj); 2122 if (res == NULL) 2123 goto fail; 2124 if (self->encoder) { 2125 /* If seek() == 0, we are at the start of stream, otherwise not */ 2126 cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ); 2127 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { 2128 Py_DECREF(res); 2129 goto fail; 2130 } 2131 } 2132 return res; 2133 } 2134 else if (whence != 0) { 2135 PyErr_Format(PyExc_ValueError, 2136 "invalid whence (%d, should be 0, 1 or 2)", whence); 2137 goto fail; 2138 } 2139 2140 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT); 2141 if (cmp < 0) 2142 goto fail; 2143 2144 if (cmp == 1) { 2145 PyErr_Format(PyExc_ValueError, 2146 "negative seek position %R", cookieObj); 2147 goto fail; 2148 } 2149 2150 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); 2151 if (res == NULL) 2152 goto fail; 2153 Py_DECREF(res); 2154 2155 /* The strategy of seek() is to go back to the safe start point 2156 * and replay the effect of read(chars_to_skip) from there. 2157 */ 2158 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0) 2159 goto fail; 2160 2161 /* Seek back to the safe start point. */ 2162 posobj = PyLong_FromOff_t(cookie.start_pos); 2163 if (posobj == NULL) 2164 goto fail; 2165 res = PyObject_CallMethodObjArgs(self->buffer, 2166 _PyIO_str_seek, posobj, NULL); 2167 Py_DECREF(posobj); 2168 if (res == NULL) 2169 goto fail; 2170 Py_DECREF(res); 2171 2172 textiowrapper_set_decoded_chars(self, NULL); 2173 Py_CLEAR(self->snapshot); 2174 2175 /* Restore the decoder to its state from the safe start point. */ 2176 if (self->decoder) { 2177 if (_textiowrapper_decoder_setstate(self, &cookie) < 0) 2178 goto fail; 2179 } 2180 2181 if (cookie.chars_to_skip) { 2182 /* Just like _read_chunk, feed the decoder and save a snapshot. */ 2183 PyObject *input_chunk = _PyObject_CallMethodId( 2184 self->buffer, &PyId_read, "i", cookie.bytes_to_feed); 2185 PyObject *decoded; 2186 2187 if (input_chunk == NULL) 2188 goto fail; 2189 2190 if (!PyBytes_Check(input_chunk)) { 2191 PyErr_Format(PyExc_TypeError, 2192 "underlying read() should have returned a bytes " 2193 "object, not '%.200s'", 2194 Py_TYPE(input_chunk)->tp_name); 2195 Py_DECREF(input_chunk); 2196 goto fail; 2197 } 2198 2199 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); 2200 if (self->snapshot == NULL) { 2201 Py_DECREF(input_chunk); 2202 goto fail; 2203 } 2204 2205 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode, 2206 "Oi", input_chunk, (int)cookie.need_eof); 2207 2208 if (check_decoded(decoded) < 0) 2209 goto fail; 2210 2211 textiowrapper_set_decoded_chars(self, decoded); 2212 2213 /* Skip chars_to_skip of the decoded characters. */ 2214 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) { 2215 PyErr_SetString(PyExc_IOError, "can't restore logical file position"); 2216 goto fail; 2217 } 2218 self->decoded_chars_used = cookie.chars_to_skip; 2219 } 2220 else { 2221 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, ""); 2222 if (self->snapshot == NULL) 2223 goto fail; 2224 } 2225 2226 /* Finally, reset the encoder (merely useful for proper BOM handling) */ 2227 if (self->encoder) { 2228 if (_textiowrapper_encoder_setstate(self, &cookie) < 0) 2229 goto fail; 2230 } 2231 return cookieObj; 2232 fail: 2233 Py_XDECREF(cookieObj); 2234 return NULL; 2235 2236 } 2237 2238 /*[clinic input] 2239 _io.TextIOWrapper.tell 2240 [clinic start generated code]*/ 2241 2242 static PyObject * 2243 _io_TextIOWrapper_tell_impl(textio *self) 2244 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/ 2245 { 2246 PyObject *res; 2247 PyObject *posobj = NULL; 2248 cookie_type cookie = {0,0,0,0,0}; 2249 PyObject *next_input; 2250 Py_ssize_t chars_to_skip, chars_decoded; 2251 Py_ssize_t skip_bytes, skip_back; 2252 PyObject *saved_state = NULL; 2253 char *input, *input_end; 2254 Py_ssize_t dec_buffer_len; 2255 int dec_flags; 2256 2257 CHECK_ATTACHED(self); 2258 CHECK_CLOSED(self); 2259 2260 if (!self->seekable) { 2261 _unsupported("underlying stream is not seekable"); 2262 goto fail; 2263 } 2264 if (!self->telling) { 2265 PyErr_SetString(PyExc_IOError, 2266 "telling position disabled by next() call"); 2267 goto fail; 2268 } 2269 2270 if (_textiowrapper_writeflush(self) < 0) 2271 return NULL; 2272 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); 2273 if (res == NULL) 2274 goto fail; 2275 Py_DECREF(res); 2276 2277 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL); 2278 if (posobj == NULL) 2279 goto fail; 2280 2281 if (self->decoder == NULL || self->snapshot == NULL) { 2282 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0); 2283 return posobj; 2284 } 2285 2286 #if defined(HAVE_LARGEFILE_SUPPORT) 2287 cookie.start_pos = PyLong_AsLongLong(posobj); 2288 #else 2289 cookie.start_pos = PyLong_AsLong(posobj); 2290 #endif 2291 Py_DECREF(posobj); 2292 if (PyErr_Occurred()) 2293 goto fail; 2294 2295 /* Skip backward to the snapshot point (see _read_chunk). */ 2296 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input)) 2297 goto fail; 2298 2299 assert (PyBytes_Check(next_input)); 2300 2301 cookie.start_pos -= PyBytes_GET_SIZE(next_input); 2302 2303 /* How many decoded characters have been used up since the snapshot? */ 2304 if (self->decoded_chars_used == 0) { 2305 /* We haven't moved from the snapshot point. */ 2306 return textiowrapper_build_cookie(&cookie); 2307 } 2308 2309 chars_to_skip = self->decoded_chars_used; 2310 2311 /* Decoder state will be restored at the end */ 2312 saved_state = PyObject_CallMethodObjArgs(self->decoder, 2313 _PyIO_str_getstate, NULL); 2314 if (saved_state == NULL) 2315 goto fail; 2316 2317 #define DECODER_GETSTATE() do { \ 2318 PyObject *dec_buffer; \ 2319 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \ 2320 _PyIO_str_getstate, NULL); \ 2321 if (_state == NULL) \ 2322 goto fail; \ 2323 if (!PyArg_ParseTuple(_state, "Oi", &dec_buffer, &dec_flags)) { \ 2324 Py_DECREF(_state); \ 2325 goto fail; \ 2326 } \ 2327 if (!PyBytes_Check(dec_buffer)) { \ 2328 PyErr_Format(PyExc_TypeError, \ 2329 "decoder getstate() should have returned a bytes " \ 2330 "object, not '%.200s'", \ 2331 Py_TYPE(dec_buffer)->tp_name); \ 2332 Py_DECREF(_state); \ 2333 goto fail; \ 2334 } \ 2335 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \ 2336 Py_DECREF(_state); \ 2337 } while (0) 2338 2339 #define DECODER_DECODE(start, len, res) do { \ 2340 PyObject *_decoded = _PyObject_CallMethodId( \ 2341 self->decoder, &PyId_decode, "y#", start, len); \ 2342 if (check_decoded(_decoded) < 0) \ 2343 goto fail; \ 2344 res = PyUnicode_GET_LENGTH(_decoded); \ 2345 Py_DECREF(_decoded); \ 2346 } while (0) 2347 2348 /* Fast search for an acceptable start point, close to our 2349 current pos */ 2350 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); 2351 skip_back = 1; 2352 assert(skip_back <= PyBytes_GET_SIZE(next_input)); 2353 input = PyBytes_AS_STRING(next_input); 2354 while (skip_bytes > 0) { 2355 /* Decode up to temptative start point */ 2356 if (_textiowrapper_decoder_setstate(self, &cookie) < 0) 2357 goto fail; 2358 DECODER_DECODE(input, skip_bytes, chars_decoded); 2359 if (chars_decoded <= chars_to_skip) { 2360 DECODER_GETSTATE(); 2361 if (dec_buffer_len == 0) { 2362 /* Before pos and no bytes buffered in decoder => OK */ 2363 cookie.dec_flags = dec_flags; 2364 chars_to_skip -= chars_decoded; 2365 break; 2366 } 2367 /* Skip back by buffered amount and reset heuristic */ 2368 skip_bytes -= dec_buffer_len; 2369 skip_back = 1; 2370 } 2371 else { 2372 /* We're too far ahead, skip back a bit */ 2373 skip_bytes -= skip_back; 2374 skip_back *= 2; 2375 } 2376 } 2377 if (skip_bytes <= 0) { 2378 skip_bytes = 0; 2379 if (_textiowrapper_decoder_setstate(self, &cookie) < 0) 2380 goto fail; 2381 } 2382 2383 /* Note our initial start point. */ 2384 cookie.start_pos += skip_bytes; 2385 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); 2386 if (chars_to_skip == 0) 2387 goto finally; 2388 2389 /* We should be close to the desired position. Now feed the decoder one 2390 * byte at a time until we reach the `chars_to_skip` target. 2391 * As we go, note the nearest "safe start point" before the current 2392 * location (a point where the decoder has nothing buffered, so seek() 2393 * can safely start from there and advance to this location). 2394 */ 2395 chars_decoded = 0; 2396 input = PyBytes_AS_STRING(next_input); 2397 input_end = input + PyBytes_GET_SIZE(next_input); 2398 input += skip_bytes; 2399 while (input < input_end) { 2400 Py_ssize_t n; 2401 2402 DECODER_DECODE(input, (Py_ssize_t)1, n); 2403 /* We got n chars for 1 byte */ 2404 chars_decoded += n; 2405 cookie.bytes_to_feed += 1; 2406 DECODER_GETSTATE(); 2407 2408 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { 2409 /* Decoder buffer is empty, so this is a safe start point. */ 2410 cookie.start_pos += cookie.bytes_to_feed; 2411 chars_to_skip -= chars_decoded; 2412 cookie.dec_flags = dec_flags; 2413 cookie.bytes_to_feed = 0; 2414 chars_decoded = 0; 2415 } 2416 if (chars_decoded >= chars_to_skip) 2417 break; 2418 input++; 2419 } 2420 if (input == input_end) { 2421 /* We didn't get enough decoded data; signal EOF to get more. */ 2422 PyObject *decoded = _PyObject_CallMethodId( 2423 self->decoder, &PyId_decode, "yi", "", /* final = */ 1); 2424 if (check_decoded(decoded) < 0) 2425 goto fail; 2426 chars_decoded += PyUnicode_GET_LENGTH(decoded); 2427 Py_DECREF(decoded); 2428 cookie.need_eof = 1; 2429 2430 if (chars_decoded < chars_to_skip) { 2431 PyErr_SetString(PyExc_IOError, 2432 "can't reconstruct logical file position"); 2433 goto fail; 2434 } 2435 } 2436 2437 finally: 2438 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state); 2439 Py_DECREF(saved_state); 2440 if (res == NULL) 2441 return NULL; 2442 Py_DECREF(res); 2443 2444 /* The returned cookie corresponds to the last safe start point. */ 2445 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); 2446 return textiowrapper_build_cookie(&cookie); 2447 2448 fail: 2449 if (saved_state) { 2450 PyObject *type, *value, *traceback; 2451 PyErr_Fetch(&type, &value, &traceback); 2452 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state); 2453 _PyErr_ChainExceptions(type, value, traceback); 2454 Py_DECREF(saved_state); 2455 Py_XDECREF(res); 2456 } 2457 return NULL; 2458 } 2459 2460 /*[clinic input] 2461 _io.TextIOWrapper.truncate 2462 pos: object = None 2463 / 2464 [clinic start generated code]*/ 2465 2466 static PyObject * 2467 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) 2468 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/ 2469 { 2470 PyObject *res; 2471 2472 CHECK_ATTACHED(self) 2473 2474 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL); 2475 if (res == NULL) 2476 return NULL; 2477 Py_DECREF(res); 2478 2479 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL); 2480 } 2481 2482 static PyObject * 2483 textiowrapper_repr(textio *self) 2484 { 2485 PyObject *nameobj, *modeobj, *res, *s; 2486 2487 CHECK_INITIALIZED(self); 2488 2489 res = PyUnicode_FromString("<_io.TextIOWrapper"); 2490 if (res == NULL) 2491 return NULL; 2492 2493 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name); 2494 if (nameobj == NULL) { 2495 if (PyErr_ExceptionMatches(PyExc_Exception)) 2496 PyErr_Clear(); 2497 else 2498 goto error; 2499 } 2500 else { 2501 s = PyUnicode_FromFormat(" name=%R", nameobj); 2502 Py_DECREF(nameobj); 2503 if (s == NULL) 2504 goto error; 2505 PyUnicode_AppendAndDel(&res, s); 2506 if (res == NULL) 2507 return NULL; 2508 } 2509 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode); 2510 if (modeobj == NULL) { 2511 if (PyErr_ExceptionMatches(PyExc_Exception)) 2512 PyErr_Clear(); 2513 else 2514 goto error; 2515 } 2516 else { 2517 s = PyUnicode_FromFormat(" mode=%R", modeobj); 2518 Py_DECREF(modeobj); 2519 if (s == NULL) 2520 goto error; 2521 PyUnicode_AppendAndDel(&res, s); 2522 if (res == NULL) 2523 return NULL; 2524 } 2525 s = PyUnicode_FromFormat("%U encoding=%R>", 2526 res, self->encoding); 2527 Py_DECREF(res); 2528 return s; 2529 error: 2530 Py_XDECREF(res); 2531 return NULL; 2532 } 2533 2534 2535 /* Inquiries */ 2536 2537 /*[clinic input] 2538 _io.TextIOWrapper.fileno 2539 [clinic start generated code]*/ 2540 2541 static PyObject * 2542 _io_TextIOWrapper_fileno_impl(textio *self) 2543 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/ 2544 { 2545 CHECK_ATTACHED(self); 2546 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL); 2547 } 2548 2549 /*[clinic input] 2550 _io.TextIOWrapper.seekable 2551 [clinic start generated code]*/ 2552 2553 static PyObject * 2554 _io_TextIOWrapper_seekable_impl(textio *self) 2555 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/ 2556 { 2557 CHECK_ATTACHED(self); 2558 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL); 2559 } 2560 2561 /*[clinic input] 2562 _io.TextIOWrapper.readable 2563 [clinic start generated code]*/ 2564 2565 static PyObject * 2566 _io_TextIOWrapper_readable_impl(textio *self) 2567 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/ 2568 { 2569 CHECK_ATTACHED(self); 2570 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL); 2571 } 2572 2573 /*[clinic input] 2574 _io.TextIOWrapper.writable 2575 [clinic start generated code]*/ 2576 2577 static PyObject * 2578 _io_TextIOWrapper_writable_impl(textio *self) 2579 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/ 2580 { 2581 CHECK_ATTACHED(self); 2582 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL); 2583 } 2584 2585 /*[clinic input] 2586 _io.TextIOWrapper.isatty 2587 [clinic start generated code]*/ 2588 2589 static PyObject * 2590 _io_TextIOWrapper_isatty_impl(textio *self) 2591 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/ 2592 { 2593 CHECK_ATTACHED(self); 2594 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL); 2595 } 2596 2597 static PyObject * 2598 textiowrapper_getstate(textio *self, PyObject *args) 2599 { 2600 PyErr_Format(PyExc_TypeError, 2601 "cannot serialize '%s' object", Py_TYPE(self)->tp_name); 2602 return NULL; 2603 } 2604 2605 /*[clinic input] 2606 _io.TextIOWrapper.flush 2607 [clinic start generated code]*/ 2608 2609 static PyObject * 2610 _io_TextIOWrapper_flush_impl(textio *self) 2611 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/ 2612 { 2613 CHECK_ATTACHED(self); 2614 CHECK_CLOSED(self); 2615 self->telling = self->seekable; 2616 if (_textiowrapper_writeflush(self) < 0) 2617 return NULL; 2618 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL); 2619 } 2620 2621 /*[clinic input] 2622 _io.TextIOWrapper.close 2623 [clinic start generated code]*/ 2624 2625 static PyObject * 2626 _io_TextIOWrapper_close_impl(textio *self) 2627 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/ 2628 { 2629 PyObject *res; 2630 int r; 2631 CHECK_ATTACHED(self); 2632 2633 res = textiowrapper_closed_get(self, NULL); 2634 if (res == NULL) 2635 return NULL; 2636 r = PyObject_IsTrue(res); 2637 Py_DECREF(res); 2638 if (r < 0) 2639 return NULL; 2640 2641 if (r > 0) { 2642 Py_RETURN_NONE; /* stream already closed */ 2643 } 2644 else { 2645 PyObject *exc = NULL, *val, *tb; 2646 if (self->finalizing) { 2647 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self); 2648 if (res) 2649 Py_DECREF(res); 2650 else 2651 PyErr_Clear(); 2652 } 2653 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); 2654 if (res == NULL) 2655 PyErr_Fetch(&exc, &val, &tb); 2656 else 2657 Py_DECREF(res); 2658 2659 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL); 2660 if (exc != NULL) { 2661 _PyErr_ChainExceptions(exc, val, tb); 2662 Py_CLEAR(res); 2663 } 2664 return res; 2665 } 2666 } 2667 2668 static PyObject * 2669 textiowrapper_iternext(textio *self) 2670 { 2671 PyObject *line; 2672 2673 CHECK_ATTACHED(self); 2674 2675 self->telling = 0; 2676 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { 2677 /* Skip method call overhead for speed */ 2678 line = _textiowrapper_readline(self, -1); 2679 } 2680 else { 2681 line = PyObject_CallMethodObjArgs((PyObject *)self, 2682 _PyIO_str_readline, NULL); 2683 if (line && !PyUnicode_Check(line)) { 2684 PyErr_Format(PyExc_IOError, 2685 "readline() should have returned a str object, " 2686 "not '%.200s'", Py_TYPE(line)->tp_name); 2687 Py_DECREF(line); 2688 return NULL; 2689 } 2690 } 2691 2692 if (line == NULL || PyUnicode_READY(line) == -1) 2693 return NULL; 2694 2695 if (PyUnicode_GET_LENGTH(line) == 0) { 2696 /* Reached EOF or would have blocked */ 2697 Py_DECREF(line); 2698 Py_CLEAR(self->snapshot); 2699 self->telling = self->seekable; 2700 return NULL; 2701 } 2702 2703 return line; 2704 } 2705 2706 static PyObject * 2707 textiowrapper_name_get(textio *self, void *context) 2708 { 2709 CHECK_ATTACHED(self); 2710 return _PyObject_GetAttrId(self->buffer, &PyId_name); 2711 } 2712 2713 static PyObject * 2714 textiowrapper_closed_get(textio *self, void *context) 2715 { 2716 CHECK_ATTACHED(self); 2717 return PyObject_GetAttr(self->buffer, _PyIO_str_closed); 2718 } 2719 2720 static PyObject * 2721 textiowrapper_newlines_get(textio *self, void *context) 2722 { 2723 PyObject *res; 2724 CHECK_ATTACHED(self); 2725 if (self->decoder == NULL) 2726 Py_RETURN_NONE; 2727 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines); 2728 if (res == NULL) { 2729 if (PyErr_ExceptionMatches(PyExc_AttributeError)) { 2730 PyErr_Clear(); 2731 Py_RETURN_NONE; 2732 } 2733 else { 2734 return NULL; 2735 } 2736 } 2737 return res; 2738 } 2739 2740 static PyObject * 2741 textiowrapper_errors_get(textio *self, void *context) 2742 { 2743 CHECK_INITIALIZED(self); 2744 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors)); 2745 } 2746 2747 static PyObject * 2748 textiowrapper_chunk_size_get(textio *self, void *context) 2749 { 2750 CHECK_ATTACHED(self); 2751 return PyLong_FromSsize_t(self->chunk_size); 2752 } 2753 2754 static int 2755 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context) 2756 { 2757 Py_ssize_t n; 2758 CHECK_ATTACHED_INT(self); 2759 n = PyNumber_AsSsize_t(arg, PyExc_ValueError); 2760 if (n == -1 && PyErr_Occurred()) 2761 return -1; 2762 if (n <= 0) { 2763 PyErr_SetString(PyExc_ValueError, 2764 "a strictly positive integer is required"); 2765 return -1; 2766 } 2767 self->chunk_size = n; 2768 return 0; 2769 } 2770 2771 #include "clinic/textio.c.h" 2772 2773 static PyMethodDef incrementalnewlinedecoder_methods[] = { 2774 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF 2775 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF 2776 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF 2777 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF 2778 {NULL} 2779 }; 2780 2781 static PyGetSetDef incrementalnewlinedecoder_getset[] = { 2782 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL}, 2783 {NULL} 2784 }; 2785 2786 PyTypeObject PyIncrementalNewlineDecoder_Type = { 2787 PyVarObject_HEAD_INIT(NULL, 0) 2788 "_io.IncrementalNewlineDecoder", /*tp_name*/ 2789 sizeof(nldecoder_object), /*tp_basicsize*/ 2790 0, /*tp_itemsize*/ 2791 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/ 2792 0, /*tp_print*/ 2793 0, /*tp_getattr*/ 2794 0, /*tp_setattr*/ 2795 0, /*tp_compare */ 2796 0, /*tp_repr*/ 2797 0, /*tp_as_number*/ 2798 0, /*tp_as_sequence*/ 2799 0, /*tp_as_mapping*/ 2800 0, /*tp_hash */ 2801 0, /*tp_call*/ 2802 0, /*tp_str*/ 2803 0, /*tp_getattro*/ 2804 0, /*tp_setattro*/ 2805 0, /*tp_as_buffer*/ 2806 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ 2807 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */ 2808 0, /* tp_traverse */ 2809 0, /* tp_clear */ 2810 0, /* tp_richcompare */ 2811 0, /*tp_weaklistoffset*/ 2812 0, /* tp_iter */ 2813 0, /* tp_iternext */ 2814 incrementalnewlinedecoder_methods, /* tp_methods */ 2815 0, /* tp_members */ 2816 incrementalnewlinedecoder_getset, /* tp_getset */ 2817 0, /* tp_base */ 2818 0, /* tp_dict */ 2819 0, /* tp_descr_get */ 2820 0, /* tp_descr_set */ 2821 0, /* tp_dictoffset */ 2822 _io_IncrementalNewlineDecoder___init__, /* tp_init */ 2823 0, /* tp_alloc */ 2824 PyType_GenericNew, /* tp_new */ 2825 }; 2826 2827 2828 static PyMethodDef textiowrapper_methods[] = { 2829 _IO_TEXTIOWRAPPER_DETACH_METHODDEF 2830 _IO_TEXTIOWRAPPER_WRITE_METHODDEF 2831 _IO_TEXTIOWRAPPER_READ_METHODDEF 2832 _IO_TEXTIOWRAPPER_READLINE_METHODDEF 2833 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF 2834 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF 2835 2836 _IO_TEXTIOWRAPPER_FILENO_METHODDEF 2837 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF 2838 _IO_TEXTIOWRAPPER_READABLE_METHODDEF 2839 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF 2840 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF 2841 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS}, 2842 2843 _IO_TEXTIOWRAPPER_SEEK_METHODDEF 2844 _IO_TEXTIOWRAPPER_TELL_METHODDEF 2845 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF 2846 {NULL, NULL} 2847 }; 2848 2849 static PyMemberDef textiowrapper_members[] = { 2850 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY}, 2851 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY}, 2852 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY}, 2853 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0}, 2854 {NULL} 2855 }; 2856 2857 static PyGetSetDef textiowrapper_getset[] = { 2858 {"name", (getter)textiowrapper_name_get, NULL, NULL}, 2859 {"closed", (getter)textiowrapper_closed_get, NULL, NULL}, 2860 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL}, 2861 */ 2862 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL}, 2863 {"errors", (getter)textiowrapper_errors_get, NULL, NULL}, 2864 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get, 2865 (setter)textiowrapper_chunk_size_set, NULL}, 2866 {NULL} 2867 }; 2868 2869 PyTypeObject PyTextIOWrapper_Type = { 2870 PyVarObject_HEAD_INIT(NULL, 0) 2871 "_io.TextIOWrapper", /*tp_name*/ 2872 sizeof(textio), /*tp_basicsize*/ 2873 0, /*tp_itemsize*/ 2874 (destructor)textiowrapper_dealloc, /*tp_dealloc*/ 2875 0, /*tp_print*/ 2876 0, /*tp_getattr*/ 2877 0, /*tps_etattr*/ 2878 0, /*tp_compare */ 2879 (reprfunc)textiowrapper_repr,/*tp_repr*/ 2880 0, /*tp_as_number*/ 2881 0, /*tp_as_sequence*/ 2882 0, /*tp_as_mapping*/ 2883 0, /*tp_hash */ 2884 0, /*tp_call*/ 2885 0, /*tp_str*/ 2886 0, /*tp_getattro*/ 2887 0, /*tp_setattro*/ 2888 0, /*tp_as_buffer*/ 2889 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE 2890 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ 2891 _io_TextIOWrapper___init____doc__, /* tp_doc */ 2892 (traverseproc)textiowrapper_traverse, /* tp_traverse */ 2893 (inquiry)textiowrapper_clear, /* tp_clear */ 2894 0, /* tp_richcompare */ 2895 offsetof(textio, weakreflist), /*tp_weaklistoffset*/ 2896 0, /* tp_iter */ 2897 (iternextfunc)textiowrapper_iternext, /* tp_iternext */ 2898 textiowrapper_methods, /* tp_methods */ 2899 textiowrapper_members, /* tp_members */ 2900 textiowrapper_getset, /* tp_getset */ 2901 0, /* tp_base */ 2902 0, /* tp_dict */ 2903 0, /* tp_descr_get */ 2904 0, /* tp_descr_set */ 2905 offsetof(textio, dict), /*tp_dictoffset*/ 2906 _io_TextIOWrapper___init__, /* tp_init */ 2907 0, /* tp_alloc */ 2908 PyType_GenericNew, /* tp_new */ 2909 0, /* tp_free */ 2910 0, /* tp_is_gc */ 2911 0, /* tp_bases */ 2912 0, /* tp_mro */ 2913 0, /* tp_cache */ 2914 0, /* tp_subclasses */ 2915 0, /* tp_weaklist */ 2916 0, /* tp_del */ 2917 0, /* tp_version_tag */ 2918 0, /* tp_finalize */ 2919 }; 2920