1 /* ------------------------------------------------------------------------ 2 3 _codecs -- Provides access to the codec registry and the builtin 4 codecs. 5 6 This module should never be imported directly. The standard library 7 module "codecs" wraps this builtin module for use within Python. 8 9 The codec registry is accessible via: 10 11 register(search_function) -> None 12 13 lookup(encoding) -> CodecInfo object 14 15 The builtin Unicode codecs use the following interface: 16 17 <encoding>_encode(Unicode_object[,errors='strict']) -> 18 (string object, bytes consumed) 19 20 <encoding>_decode(char_buffer_obj[,errors='strict']) -> 21 (Unicode object, bytes consumed) 22 23 These <encoding>s are available: utf_8, unicode_escape, 24 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), 25 mbcs (on win32). 26 27 28 Written by Marc-Andre Lemburg (mal (at) lemburg.com). 29 30 Copyright (c) Corporation for National Research Initiatives. 31 32 ------------------------------------------------------------------------ */ 33 34 #define PY_SSIZE_T_CLEAN 35 #include "Python.h" 36 37 #ifdef MS_WINDOWS 38 #include <windows.h> 39 #endif 40 41 /*[clinic input] 42 module _codecs 43 [clinic start generated code]*/ 44 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/ 45 46 #include "clinic/_codecsmodule.c.h" 47 48 /* --- Registry ----------------------------------------------------------- */ 49 50 /*[clinic input] 51 _codecs.register 52 search_function: object 53 / 54 55 Register a codec search function. 56 57 Search functions are expected to take one argument, the encoding name in 58 all lower case letters, and either return None, or a tuple of functions 59 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object). 60 [clinic start generated code]*/ 61 62 static PyObject * 63 _codecs_register(PyObject *module, PyObject *search_function) 64 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/ 65 { 66 if (PyCodec_Register(search_function)) 67 return NULL; 68 69 Py_RETURN_NONE; 70 } 71 72 /*[clinic input] 73 _codecs.lookup 74 encoding: str 75 / 76 77 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object. 78 [clinic start generated code]*/ 79 80 static PyObject * 81 _codecs_lookup_impl(PyObject *module, const char *encoding) 82 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/ 83 { 84 return _PyCodec_Lookup(encoding); 85 } 86 87 /*[clinic input] 88 _codecs.encode 89 obj: object 90 encoding: str(c_default="NULL") = "utf-8" 91 errors: str(c_default="NULL") = "strict" 92 93 Encodes obj using the codec registered for encoding. 94 95 The default encoding is 'utf-8'. errors may be given to set a 96 different error handling scheme. Default is 'strict' meaning that encoding 97 errors raise a ValueError. Other possible values are 'ignore', 'replace' 98 and 'backslashreplace' as well as any other name registered with 99 codecs.register_error that can handle ValueErrors. 100 [clinic start generated code]*/ 101 102 static PyObject * 103 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding, 104 const char *errors) 105 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/ 106 { 107 if (encoding == NULL) 108 encoding = PyUnicode_GetDefaultEncoding(); 109 110 /* Encode via the codec registry */ 111 return PyCodec_Encode(obj, encoding, errors); 112 } 113 114 /*[clinic input] 115 _codecs.decode 116 obj: object 117 encoding: str(c_default="NULL") = "utf-8" 118 errors: str(c_default="NULL") = "strict" 119 120 Decodes obj using the codec registered for encoding. 121 122 Default encoding is 'utf-8'. errors may be given to set a 123 different error handling scheme. Default is 'strict' meaning that encoding 124 errors raise a ValueError. Other possible values are 'ignore', 'replace' 125 and 'backslashreplace' as well as any other name registered with 126 codecs.register_error that can handle ValueErrors. 127 [clinic start generated code]*/ 128 129 static PyObject * 130 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding, 131 const char *errors) 132 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/ 133 { 134 if (encoding == NULL) 135 encoding = PyUnicode_GetDefaultEncoding(); 136 137 /* Decode via the codec registry */ 138 return PyCodec_Decode(obj, encoding, errors); 139 } 140 141 /* --- Helpers ------------------------------------------------------------ */ 142 143 /*[clinic input] 144 _codecs._forget_codec 145 146 encoding: str 147 / 148 149 Purge the named codec from the internal codec lookup cache 150 [clinic start generated code]*/ 151 152 static PyObject * 153 _codecs__forget_codec_impl(PyObject *module, const char *encoding) 154 /*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/ 155 { 156 if (_PyCodec_Forget(encoding) < 0) { 157 return NULL; 158 }; 159 Py_RETURN_NONE; 160 } 161 162 static 163 PyObject *codec_tuple(PyObject *decoded, 164 Py_ssize_t len) 165 { 166 if (decoded == NULL) 167 return NULL; 168 return Py_BuildValue("Nn", decoded, len); 169 } 170 171 /* --- String codecs ------------------------------------------------------ */ 172 /*[clinic input] 173 _codecs.escape_decode 174 data: Py_buffer(accept={str, buffer}) 175 errors: str(accept={str, NoneType}) = NULL 176 / 177 [clinic start generated code]*/ 178 179 static PyObject * 180 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data, 181 const char *errors) 182 /*[clinic end generated code: output=505200ba8056979a input=0018edfd99db714d]*/ 183 { 184 PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len, 185 errors, 0, NULL); 186 return codec_tuple(decoded, data->len); 187 } 188 189 /*[clinic input] 190 _codecs.escape_encode 191 data: object(subclass_of='&PyBytes_Type') 192 errors: str(accept={str, NoneType}) = NULL 193 / 194 [clinic start generated code]*/ 195 196 static PyObject * 197 _codecs_escape_encode_impl(PyObject *module, PyObject *data, 198 const char *errors) 199 /*[clinic end generated code: output=4af1d477834bab34 input=da9ded00992f32f2]*/ 200 { 201 Py_ssize_t size; 202 Py_ssize_t newsize; 203 PyObject *v; 204 205 size = PyBytes_GET_SIZE(data); 206 if (size > PY_SSIZE_T_MAX / 4) { 207 PyErr_SetString(PyExc_OverflowError, 208 "string is too large to encode"); 209 return NULL; 210 } 211 newsize = 4*size; 212 v = PyBytes_FromStringAndSize(NULL, newsize); 213 214 if (v == NULL) { 215 return NULL; 216 } 217 else { 218 Py_ssize_t i; 219 char c; 220 char *p = PyBytes_AS_STRING(v); 221 222 for (i = 0; i < size; i++) { 223 /* There's at least enough room for a hex escape */ 224 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); 225 c = PyBytes_AS_STRING(data)[i]; 226 if (c == '\'' || c == '\\') 227 *p++ = '\\', *p++ = c; 228 else if (c == '\t') 229 *p++ = '\\', *p++ = 't'; 230 else if (c == '\n') 231 *p++ = '\\', *p++ = 'n'; 232 else if (c == '\r') 233 *p++ = '\\', *p++ = 'r'; 234 else if (c < ' ' || c >= 0x7f) { 235 *p++ = '\\'; 236 *p++ = 'x'; 237 *p++ = Py_hexdigits[(c & 0xf0) >> 4]; 238 *p++ = Py_hexdigits[c & 0xf]; 239 } 240 else 241 *p++ = c; 242 } 243 *p = '\0'; 244 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) { 245 return NULL; 246 } 247 } 248 249 return codec_tuple(v, size); 250 } 251 252 /* --- Decoder ------------------------------------------------------------ */ 253 /*[clinic input] 254 _codecs.unicode_internal_decode 255 obj: object 256 errors: str(accept={str, NoneType}) = NULL 257 / 258 [clinic start generated code]*/ 259 260 static PyObject * 261 _codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj, 262 const char *errors) 263 /*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/ 264 { 265 if (PyUnicode_Check(obj)) { 266 if (PyUnicode_READY(obj) < 0) 267 return NULL; 268 Py_INCREF(obj); 269 return codec_tuple(obj, PyUnicode_GET_LENGTH(obj)); 270 } 271 else { 272 Py_buffer view; 273 PyObject *result; 274 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0) 275 return NULL; 276 277 result = codec_tuple( 278 _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors), 279 view.len); 280 PyBuffer_Release(&view); 281 return result; 282 } 283 } 284 285 /*[clinic input] 286 _codecs.utf_7_decode 287 data: Py_buffer 288 errors: str(accept={str, NoneType}) = NULL 289 final: int(c_default="0") = False 290 / 291 [clinic start generated code]*/ 292 293 static PyObject * 294 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data, 295 const char *errors, int final) 296 /*[clinic end generated code: output=0cd3a944a32a4089 input=bc4d6247ecdb01e6]*/ 297 { 298 Py_ssize_t consumed = data->len; 299 PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len, 300 errors, 301 final ? NULL : &consumed); 302 return codec_tuple(decoded, consumed); 303 } 304 305 /*[clinic input] 306 _codecs.utf_8_decode 307 data: Py_buffer 308 errors: str(accept={str, NoneType}) = NULL 309 final: int(c_default="0") = False 310 / 311 [clinic start generated code]*/ 312 313 static PyObject * 314 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data, 315 const char *errors, int final) 316 /*[clinic end generated code: output=10f74dec8d9bb8bf input=39161d71e7422ee2]*/ 317 { 318 Py_ssize_t consumed = data->len; 319 PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len, 320 errors, 321 final ? NULL : &consumed); 322 return codec_tuple(decoded, consumed); 323 } 324 325 /*[clinic input] 326 _codecs.utf_16_decode 327 data: Py_buffer 328 errors: str(accept={str, NoneType}) = NULL 329 final: int(c_default="0") = False 330 / 331 [clinic start generated code]*/ 332 333 static PyObject * 334 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data, 335 const char *errors, int final) 336 /*[clinic end generated code: output=783b442abcbcc2d0 input=f3cf01d1461007ce]*/ 337 { 338 int byteorder = 0; 339 /* This is overwritten unless final is true. */ 340 Py_ssize_t consumed = data->len; 341 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 342 errors, &byteorder, 343 final ? NULL : &consumed); 344 return codec_tuple(decoded, consumed); 345 } 346 347 /*[clinic input] 348 _codecs.utf_16_le_decode 349 data: Py_buffer 350 errors: str(accept={str, NoneType}) = NULL 351 final: int(c_default="0") = False 352 / 353 [clinic start generated code]*/ 354 355 static PyObject * 356 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data, 357 const char *errors, int final) 358 /*[clinic end generated code: output=899b9e6364379dcd input=a77e3bf97335d94e]*/ 359 { 360 int byteorder = -1; 361 /* This is overwritten unless final is true. */ 362 Py_ssize_t consumed = data->len; 363 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 364 errors, &byteorder, 365 final ? NULL : &consumed); 366 return codec_tuple(decoded, consumed); 367 } 368 369 /*[clinic input] 370 _codecs.utf_16_be_decode 371 data: Py_buffer 372 errors: str(accept={str, NoneType}) = NULL 373 final: int(c_default="0") = False 374 / 375 [clinic start generated code]*/ 376 377 static PyObject * 378 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data, 379 const char *errors, int final) 380 /*[clinic end generated code: output=49f6465ea07669c8 input=606f69fae91b5563]*/ 381 { 382 int byteorder = 1; 383 /* This is overwritten unless final is true. */ 384 Py_ssize_t consumed = data->len; 385 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 386 errors, &byteorder, 387 final ? NULL : &consumed); 388 return codec_tuple(decoded, consumed); 389 } 390 391 /* This non-standard version also provides access to the byteorder 392 parameter of the builtin UTF-16 codec. 393 394 It returns a tuple (unicode, bytesread, byteorder) with byteorder 395 being the value in effect at the end of data. 396 397 */ 398 /*[clinic input] 399 _codecs.utf_16_ex_decode 400 data: Py_buffer 401 errors: str(accept={str, NoneType}) = NULL 402 byteorder: int = 0 403 final: int(c_default="0") = False 404 / 405 [clinic start generated code]*/ 406 407 static PyObject * 408 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data, 409 const char *errors, int byteorder, int final) 410 /*[clinic end generated code: output=0f385f251ecc1988 input=f6e7f697658c013e]*/ 411 { 412 /* This is overwritten unless final is true. */ 413 Py_ssize_t consumed = data->len; 414 415 PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, 416 errors, &byteorder, 417 final ? NULL : &consumed); 418 if (decoded == NULL) 419 return NULL; 420 return Py_BuildValue("Nni", decoded, consumed, byteorder); 421 } 422 423 /*[clinic input] 424 _codecs.utf_32_decode 425 data: Py_buffer 426 errors: str(accept={str, NoneType}) = NULL 427 final: int(c_default="0") = False 428 / 429 [clinic start generated code]*/ 430 431 static PyObject * 432 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data, 433 const char *errors, int final) 434 /*[clinic end generated code: output=2fc961807f7b145f input=86d4f41c6c2e763d]*/ 435 { 436 int byteorder = 0; 437 /* This is overwritten unless final is true. */ 438 Py_ssize_t consumed = data->len; 439 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 440 errors, &byteorder, 441 final ? NULL : &consumed); 442 return codec_tuple(decoded, consumed); 443 } 444 445 /*[clinic input] 446 _codecs.utf_32_le_decode 447 data: Py_buffer 448 errors: str(accept={str, NoneType}) = NULL 449 final: int(c_default="0") = False 450 / 451 [clinic start generated code]*/ 452 453 static PyObject * 454 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data, 455 const char *errors, int final) 456 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=d18b650772d188ba]*/ 457 { 458 int byteorder = -1; 459 /* This is overwritten unless final is true. */ 460 Py_ssize_t consumed = data->len; 461 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 462 errors, &byteorder, 463 final ? NULL : &consumed); 464 return codec_tuple(decoded, consumed); 465 } 466 467 /*[clinic input] 468 _codecs.utf_32_be_decode 469 data: Py_buffer 470 errors: str(accept={str, NoneType}) = NULL 471 final: int(c_default="0") = False 472 / 473 [clinic start generated code]*/ 474 475 static PyObject * 476 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data, 477 const char *errors, int final) 478 /*[clinic end generated code: output=ff82bae862c92c4e input=19c271b5d34926d8]*/ 479 { 480 int byteorder = 1; 481 /* This is overwritten unless final is true. */ 482 Py_ssize_t consumed = data->len; 483 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 484 errors, &byteorder, 485 final ? NULL : &consumed); 486 return codec_tuple(decoded, consumed); 487 } 488 489 /* This non-standard version also provides access to the byteorder 490 parameter of the builtin UTF-32 codec. 491 492 It returns a tuple (unicode, bytesread, byteorder) with byteorder 493 being the value in effect at the end of data. 494 495 */ 496 /*[clinic input] 497 _codecs.utf_32_ex_decode 498 data: Py_buffer 499 errors: str(accept={str, NoneType}) = NULL 500 byteorder: int = 0 501 final: int(c_default="0") = False 502 / 503 [clinic start generated code]*/ 504 505 static PyObject * 506 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data, 507 const char *errors, int byteorder, int final) 508 /*[clinic end generated code: output=6bfb177dceaf4848 input=4af3e6ccfe34a076]*/ 509 { 510 Py_ssize_t consumed = data->len; 511 PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, 512 errors, &byteorder, 513 final ? NULL : &consumed); 514 if (decoded == NULL) 515 return NULL; 516 return Py_BuildValue("Nni", decoded, consumed, byteorder); 517 } 518 519 /*[clinic input] 520 _codecs.unicode_escape_decode 521 data: Py_buffer(accept={str, buffer}) 522 errors: str(accept={str, NoneType}) = NULL 523 / 524 [clinic start generated code]*/ 525 526 static PyObject * 527 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, 528 const char *errors) 529 /*[clinic end generated code: output=3ca3c917176b82ab input=49fd27d06813a7f5]*/ 530 { 531 PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len, 532 errors); 533 return codec_tuple(decoded, data->len); 534 } 535 536 /*[clinic input] 537 _codecs.raw_unicode_escape_decode 538 data: Py_buffer(accept={str, buffer}) 539 errors: str(accept={str, NoneType}) = NULL 540 / 541 [clinic start generated code]*/ 542 543 static PyObject * 544 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, 545 const char *errors) 546 /*[clinic end generated code: output=c98eeb56028070a6 input=770903a211434ebc]*/ 547 { 548 PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len, 549 errors); 550 return codec_tuple(decoded, data->len); 551 } 552 553 /*[clinic input] 554 _codecs.latin_1_decode 555 data: Py_buffer 556 errors: str(accept={str, NoneType}) = NULL 557 / 558 [clinic start generated code]*/ 559 560 static PyObject * 561 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data, 562 const char *errors) 563 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=5cad0f1759c618ec]*/ 564 { 565 PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors); 566 return codec_tuple(decoded, data->len); 567 } 568 569 /*[clinic input] 570 _codecs.ascii_decode 571 data: Py_buffer 572 errors: str(accept={str, NoneType}) = NULL 573 / 574 [clinic start generated code]*/ 575 576 static PyObject * 577 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data, 578 const char *errors) 579 /*[clinic end generated code: output=2627d72058d42429 input=ad1106f64037bd16]*/ 580 { 581 PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors); 582 return codec_tuple(decoded, data->len); 583 } 584 585 /*[clinic input] 586 _codecs.charmap_decode 587 data: Py_buffer 588 errors: str(accept={str, NoneType}) = NULL 589 mapping: object = NULL 590 / 591 [clinic start generated code]*/ 592 593 static PyObject * 594 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data, 595 const char *errors, PyObject *mapping) 596 /*[clinic end generated code: output=2c335b09778cf895 input=19712ca35c5a80e2]*/ 597 { 598 PyObject *decoded; 599 600 if (mapping == Py_None) 601 mapping = NULL; 602 603 decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors); 604 return codec_tuple(decoded, data->len); 605 } 606 607 #ifdef MS_WINDOWS 608 609 /*[clinic input] 610 _codecs.mbcs_decode 611 data: Py_buffer 612 errors: str(accept={str, NoneType}) = NULL 613 final: int(c_default="0") = False 614 / 615 [clinic start generated code]*/ 616 617 static PyObject * 618 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data, 619 const char *errors, int final) 620 /*[clinic end generated code: output=39b65b8598938c4b input=d492c1ca64f4fa8a]*/ 621 { 622 Py_ssize_t consumed = data->len; 623 PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len, 624 errors, final ? NULL : &consumed); 625 return codec_tuple(decoded, consumed); 626 } 627 628 /*[clinic input] 629 _codecs.oem_decode 630 data: Py_buffer 631 errors: str(accept={str, NoneType}) = NULL 632 final: int(c_default="0") = False 633 / 634 [clinic start generated code]*/ 635 636 static PyObject * 637 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data, 638 const char *errors, int final) 639 /*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/ 640 { 641 Py_ssize_t consumed = data->len; 642 PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP, 643 data->buf, data->len, errors, final ? NULL : &consumed); 644 return codec_tuple(decoded, consumed); 645 } 646 647 /*[clinic input] 648 _codecs.code_page_decode 649 codepage: int 650 data: Py_buffer 651 errors: str(accept={str, NoneType}) = NULL 652 final: int(c_default="0") = False 653 / 654 [clinic start generated code]*/ 655 656 static PyObject * 657 _codecs_code_page_decode_impl(PyObject *module, int codepage, 658 Py_buffer *data, const char *errors, int final) 659 /*[clinic end generated code: output=53008ea967da3fff input=4f3152a304e21d51]*/ 660 { 661 Py_ssize_t consumed = data->len; 662 PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage, 663 data->buf, data->len, 664 errors, 665 final ? NULL : &consumed); 666 return codec_tuple(decoded, consumed); 667 } 668 669 #endif /* MS_WINDOWS */ 670 671 /* --- Encoder ------------------------------------------------------------ */ 672 673 /*[clinic input] 674 _codecs.readbuffer_encode 675 data: Py_buffer(accept={str, buffer}) 676 errors: str(accept={str, NoneType}) = NULL 677 / 678 [clinic start generated code]*/ 679 680 static PyObject * 681 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data, 682 const char *errors) 683 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=b7c322b89d4ab923]*/ 684 { 685 PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len); 686 return codec_tuple(result, data->len); 687 } 688 689 /*[clinic input] 690 _codecs.unicode_internal_encode 691 obj: object 692 errors: str(accept={str, NoneType}) = NULL 693 / 694 [clinic start generated code]*/ 695 696 static PyObject * 697 _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, 698 const char *errors) 699 /*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/ 700 { 701 if (PyErr_WarnEx(PyExc_DeprecationWarning, 702 "unicode_internal codec has been deprecated", 703 1)) 704 return NULL; 705 706 if (PyUnicode_Check(obj)) { 707 Py_UNICODE *u; 708 Py_ssize_t len, size; 709 710 if (PyUnicode_READY(obj) < 0) 711 return NULL; 712 713 u = PyUnicode_AsUnicodeAndSize(obj, &len); 714 if (u == NULL) 715 return NULL; 716 if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) 717 return PyErr_NoMemory(); 718 size = len * sizeof(Py_UNICODE); 719 return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size), 720 PyUnicode_GET_LENGTH(obj)); 721 } 722 else { 723 Py_buffer view; 724 PyObject *result; 725 if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0) 726 return NULL; 727 result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len), 728 view.len); 729 PyBuffer_Release(&view); 730 return result; 731 } 732 } 733 734 /*[clinic input] 735 _codecs.utf_7_encode 736 str: unicode 737 errors: str(accept={str, NoneType}) = NULL 738 / 739 [clinic start generated code]*/ 740 741 static PyObject * 742 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str, 743 const char *errors) 744 /*[clinic end generated code: output=0feda21ffc921bc8 input=d1a47579e79cbe15]*/ 745 { 746 return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), 747 PyUnicode_GET_LENGTH(str)); 748 } 749 750 /*[clinic input] 751 _codecs.utf_8_encode 752 str: unicode 753 errors: str(accept={str, NoneType}) = NULL 754 / 755 [clinic start generated code]*/ 756 757 static PyObject * 758 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str, 759 const char *errors) 760 /*[clinic end generated code: output=02bf47332b9c796c input=42e3ba73c4392eef]*/ 761 { 762 return codec_tuple(_PyUnicode_AsUTF8String(str, errors), 763 PyUnicode_GET_LENGTH(str)); 764 } 765 766 /* This version provides access to the byteorder parameter of the 767 builtin UTF-16 codecs as optional third argument. It defaults to 0 768 which means: use the native byte order and prepend the data with a 769 BOM mark. 770 771 */ 772 773 /*[clinic input] 774 _codecs.utf_16_encode 775 str: unicode 776 errors: str(accept={str, NoneType}) = NULL 777 byteorder: int = 0 778 / 779 [clinic start generated code]*/ 780 781 static PyObject * 782 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str, 783 const char *errors, int byteorder) 784 /*[clinic end generated code: output=c654e13efa2e64e4 input=ff46416b04edb944]*/ 785 { 786 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), 787 PyUnicode_GET_LENGTH(str)); 788 } 789 790 /*[clinic input] 791 _codecs.utf_16_le_encode 792 str: unicode 793 errors: str(accept={str, NoneType}) = NULL 794 / 795 [clinic start generated code]*/ 796 797 static PyObject * 798 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str, 799 const char *errors) 800 /*[clinic end generated code: output=431b01e55f2d4995 input=cb385455ea8f2fe0]*/ 801 { 802 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), 803 PyUnicode_GET_LENGTH(str)); 804 } 805 806 /*[clinic input] 807 _codecs.utf_16_be_encode 808 str: unicode 809 errors: str(accept={str, NoneType}) = NULL 810 / 811 [clinic start generated code]*/ 812 813 static PyObject * 814 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str, 815 const char *errors) 816 /*[clinic end generated code: output=96886a6fd54dcae3 input=9119997066bdaefd]*/ 817 { 818 return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), 819 PyUnicode_GET_LENGTH(str)); 820 } 821 822 /* This version provides access to the byteorder parameter of the 823 builtin UTF-32 codecs as optional third argument. It defaults to 0 824 which means: use the native byte order and prepend the data with a 825 BOM mark. 826 827 */ 828 829 /*[clinic input] 830 _codecs.utf_32_encode 831 str: unicode 832 errors: str(accept={str, NoneType}) = NULL 833 byteorder: int = 0 834 / 835 [clinic start generated code]*/ 836 837 static PyObject * 838 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str, 839 const char *errors, int byteorder) 840 /*[clinic end generated code: output=5c760da0c09a8b83 input=c5e77da82fbe5c2a]*/ 841 { 842 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), 843 PyUnicode_GET_LENGTH(str)); 844 } 845 846 /*[clinic input] 847 _codecs.utf_32_le_encode 848 str: unicode 849 errors: str(accept={str, NoneType}) = NULL 850 / 851 [clinic start generated code]*/ 852 853 static PyObject * 854 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str, 855 const char *errors) 856 /*[clinic end generated code: output=b65cd176de8e36d6 input=9993b25fe0877848]*/ 857 { 858 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), 859 PyUnicode_GET_LENGTH(str)); 860 } 861 862 /*[clinic input] 863 _codecs.utf_32_be_encode 864 str: unicode 865 errors: str(accept={str, NoneType}) = NULL 866 / 867 [clinic start generated code]*/ 868 869 static PyObject * 870 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str, 871 const char *errors) 872 /*[clinic end generated code: output=1d9e71a9358709e9 input=d3e0ccaa02920431]*/ 873 { 874 return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), 875 PyUnicode_GET_LENGTH(str)); 876 } 877 878 /*[clinic input] 879 _codecs.unicode_escape_encode 880 str: unicode 881 errors: str(accept={str, NoneType}) = NULL 882 / 883 [clinic start generated code]*/ 884 885 static PyObject * 886 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str, 887 const char *errors) 888 /*[clinic end generated code: output=66271b30bc4f7a3c input=65d9eefca65b455a]*/ 889 { 890 return codec_tuple(PyUnicode_AsUnicodeEscapeString(str), 891 PyUnicode_GET_LENGTH(str)); 892 } 893 894 /*[clinic input] 895 _codecs.raw_unicode_escape_encode 896 str: unicode 897 errors: str(accept={str, NoneType}) = NULL 898 / 899 [clinic start generated code]*/ 900 901 static PyObject * 902 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str, 903 const char *errors) 904 /*[clinic end generated code: output=a66a806ed01c830a input=5aa33e4a133391ab]*/ 905 { 906 return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), 907 PyUnicode_GET_LENGTH(str)); 908 } 909 910 /*[clinic input] 911 _codecs.latin_1_encode 912 str: unicode 913 errors: str(accept={str, NoneType}) = NULL 914 / 915 [clinic start generated code]*/ 916 917 static PyObject * 918 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str, 919 const char *errors) 920 /*[clinic end generated code: output=2c28c83a27884e08 input=30b11c9e49a65150]*/ 921 { 922 return codec_tuple(_PyUnicode_AsLatin1String(str, errors), 923 PyUnicode_GET_LENGTH(str)); 924 } 925 926 /*[clinic input] 927 _codecs.ascii_encode 928 str: unicode 929 errors: str(accept={str, NoneType}) = NULL 930 / 931 [clinic start generated code]*/ 932 933 static PyObject * 934 _codecs_ascii_encode_impl(PyObject *module, PyObject *str, 935 const char *errors) 936 /*[clinic end generated code: output=b5e035182d33befc input=843a1d268e6dfa8e]*/ 937 { 938 return codec_tuple(_PyUnicode_AsASCIIString(str, errors), 939 PyUnicode_GET_LENGTH(str)); 940 } 941 942 /*[clinic input] 943 _codecs.charmap_encode 944 str: unicode 945 errors: str(accept={str, NoneType}) = NULL 946 mapping: object = NULL 947 / 948 [clinic start generated code]*/ 949 950 static PyObject * 951 _codecs_charmap_encode_impl(PyObject *module, PyObject *str, 952 const char *errors, PyObject *mapping) 953 /*[clinic end generated code: output=047476f48495a9e9 input=0752cde07a6d6d00]*/ 954 { 955 if (mapping == Py_None) 956 mapping = NULL; 957 958 return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), 959 PyUnicode_GET_LENGTH(str)); 960 } 961 962 /*[clinic input] 963 _codecs.charmap_build 964 map: unicode 965 / 966 [clinic start generated code]*/ 967 968 static PyObject * 969 _codecs_charmap_build_impl(PyObject *module, PyObject *map) 970 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/ 971 { 972 return PyUnicode_BuildEncodingMap(map); 973 } 974 975 #ifdef MS_WINDOWS 976 977 /*[clinic input] 978 _codecs.mbcs_encode 979 str: unicode 980 errors: str(accept={str, NoneType}) = NULL 981 / 982 [clinic start generated code]*/ 983 984 static PyObject * 985 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors) 986 /*[clinic end generated code: output=76e2e170c966c080 input=de471e0815947553]*/ 987 { 988 return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), 989 PyUnicode_GET_LENGTH(str)); 990 } 991 992 /*[clinic input] 993 _codecs.oem_encode 994 str: unicode 995 errors: str(accept={str, NoneType}) = NULL 996 / 997 [clinic start generated code]*/ 998 999 static PyObject * 1000 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) 1001 /*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/ 1002 { 1003 return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), 1004 PyUnicode_GET_LENGTH(str)); 1005 } 1006 1007 /*[clinic input] 1008 _codecs.code_page_encode 1009 code_page: int 1010 str: unicode 1011 errors: str(accept={str, NoneType}) = NULL 1012 / 1013 [clinic start generated code]*/ 1014 1015 static PyObject * 1016 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str, 1017 const char *errors) 1018 /*[clinic end generated code: output=45673f6085657a9e input=786421ae617d680b]*/ 1019 { 1020 return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors), 1021 PyUnicode_GET_LENGTH(str)); 1022 } 1023 1024 #endif /* MS_WINDOWS */ 1025 1026 /* --- Error handler registry --------------------------------------------- */ 1027 1028 /*[clinic input] 1029 _codecs.register_error 1030 errors: str 1031 handler: object 1032 / 1033 1034 Register the specified error handler under the name errors. 1035 1036 handler must be a callable object, that will be called with an exception 1037 instance containing information about the location of the encoding/decoding 1038 error and must return a (replacement, new position) tuple. 1039 [clinic start generated code]*/ 1040 1041 static PyObject * 1042 _codecs_register_error_impl(PyObject *module, const char *errors, 1043 PyObject *handler) 1044 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/ 1045 { 1046 if (PyCodec_RegisterError(errors, handler)) 1047 return NULL; 1048 Py_RETURN_NONE; 1049 } 1050 1051 /*[clinic input] 1052 _codecs.lookup_error 1053 name: str 1054 / 1055 1056 lookup_error(errors) -> handler 1057 1058 Return the error handler for the specified error handling name or raise a 1059 LookupError, if no handler exists under this name. 1060 [clinic start generated code]*/ 1061 1062 static PyObject * 1063 _codecs_lookup_error_impl(PyObject *module, const char *name) 1064 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/ 1065 { 1066 return PyCodec_LookupError(name); 1067 } 1068 1069 /* --- Module API --------------------------------------------------------- */ 1070 1071 static PyMethodDef _codecs_functions[] = { 1072 _CODECS_REGISTER_METHODDEF 1073 _CODECS_LOOKUP_METHODDEF 1074 _CODECS_ENCODE_METHODDEF 1075 _CODECS_DECODE_METHODDEF 1076 _CODECS_ESCAPE_ENCODE_METHODDEF 1077 _CODECS_ESCAPE_DECODE_METHODDEF 1078 _CODECS_UTF_8_ENCODE_METHODDEF 1079 _CODECS_UTF_8_DECODE_METHODDEF 1080 _CODECS_UTF_7_ENCODE_METHODDEF 1081 _CODECS_UTF_7_DECODE_METHODDEF 1082 _CODECS_UTF_16_ENCODE_METHODDEF 1083 _CODECS_UTF_16_LE_ENCODE_METHODDEF 1084 _CODECS_UTF_16_BE_ENCODE_METHODDEF 1085 _CODECS_UTF_16_DECODE_METHODDEF 1086 _CODECS_UTF_16_LE_DECODE_METHODDEF 1087 _CODECS_UTF_16_BE_DECODE_METHODDEF 1088 _CODECS_UTF_16_EX_DECODE_METHODDEF 1089 _CODECS_UTF_32_ENCODE_METHODDEF 1090 _CODECS_UTF_32_LE_ENCODE_METHODDEF 1091 _CODECS_UTF_32_BE_ENCODE_METHODDEF 1092 _CODECS_UTF_32_DECODE_METHODDEF 1093 _CODECS_UTF_32_LE_DECODE_METHODDEF 1094 _CODECS_UTF_32_BE_DECODE_METHODDEF 1095 _CODECS_UTF_32_EX_DECODE_METHODDEF 1096 _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF 1097 _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF 1098 _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF 1099 _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF 1100 _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF 1101 _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF 1102 _CODECS_LATIN_1_ENCODE_METHODDEF 1103 _CODECS_LATIN_1_DECODE_METHODDEF 1104 _CODECS_ASCII_ENCODE_METHODDEF 1105 _CODECS_ASCII_DECODE_METHODDEF 1106 _CODECS_CHARMAP_ENCODE_METHODDEF 1107 _CODECS_CHARMAP_DECODE_METHODDEF 1108 _CODECS_CHARMAP_BUILD_METHODDEF 1109 _CODECS_READBUFFER_ENCODE_METHODDEF 1110 _CODECS_MBCS_ENCODE_METHODDEF 1111 _CODECS_MBCS_DECODE_METHODDEF 1112 _CODECS_OEM_ENCODE_METHODDEF 1113 _CODECS_OEM_DECODE_METHODDEF 1114 _CODECS_CODE_PAGE_ENCODE_METHODDEF 1115 _CODECS_CODE_PAGE_DECODE_METHODDEF 1116 _CODECS_REGISTER_ERROR_METHODDEF 1117 _CODECS_LOOKUP_ERROR_METHODDEF 1118 _CODECS__FORGET_CODEC_METHODDEF 1119 {NULL, NULL} /* sentinel */ 1120 }; 1121 1122 static struct PyModuleDef codecsmodule = { 1123 PyModuleDef_HEAD_INIT, 1124 "_codecs", 1125 NULL, 1126 -1, 1127 _codecs_functions, 1128 NULL, 1129 NULL, 1130 NULL, 1131 NULL 1132 }; 1133 1134 PyMODINIT_FUNC 1135 PyInit__codecs(void) 1136 { 1137 return PyModule_Create(&codecsmodule); 1138 } 1139