1 /* ------------------------------------------------------------------------ 2 3 _codecs -- Provides access to the codec registry and the builtin 4 codecs. 5 6 This module should never be imported directly. The standard library 7 module "codecs" wraps this builtin module for use within Python. 8 9 The codec registry is accessible via: 10 11 register(search_function) -> None 12 13 lookup(encoding) -> CodecInfo object 14 15 The builtin Unicode codecs use the following interface: 16 17 <encoding>_encode(Unicode_object[,errors='strict']) -> 18 (string object, bytes consumed) 19 20 <encoding>_decode(char_buffer_obj[,errors='strict']) -> 21 (Unicode object, bytes consumed) 22 23 <encoding>_encode() interfaces also accept non-Unicode object as 24 input. The objects are then converted to Unicode using 25 PyUnicode_FromObject() prior to applying the conversion. 26 27 These <encoding>s are available: utf_8, unicode_escape, 28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), 29 mbcs (on win32). 30 31 32 Written by Marc-Andre Lemburg (mal (at) lemburg.com). 33 34 Copyright (c) Corporation for National Research Initiatives. 35 36 ------------------------------------------------------------------------ */ 37 38 #define PY_SSIZE_T_CLEAN 39 #include "Python.h" 40 41 /* --- Registry ----------------------------------------------------------- */ 42 43 PyDoc_STRVAR(register__doc__, 44 "register(search_function)\n\ 45 \n\ 46 Register a codec search function. Search functions are expected to take\n\ 47 one argument, the encoding name in all lower case letters, and return\n\ 48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\ 49 (or a CodecInfo object)."); 50 51 static 52 PyObject *codec_register(PyObject *self, PyObject *search_function) 53 { 54 if (PyCodec_Register(search_function)) 55 return NULL; 56 57 Py_RETURN_NONE; 58 } 59 60 PyDoc_STRVAR(lookup__doc__, 61 "lookup(encoding) -> CodecInfo\n\ 62 \n\ 63 Looks up a codec tuple in the Python codec registry and returns\n\ 64 a CodecInfo object."); 65 66 static 67 PyObject *codec_lookup(PyObject *self, PyObject *args) 68 { 69 char *encoding; 70 71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding)) 72 return NULL; 73 74 return _PyCodec_Lookup(encoding); 75 } 76 77 PyDoc_STRVAR(encode__doc__, 78 "encode(obj, [encoding[,errors]]) -> object\n\ 79 \n\ 80 Encodes obj using the codec registered for encoding. encoding defaults\n\ 81 to the default encoding. errors may be given to set a different error\n\ 82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 83 a ValueError. Other possible values are 'ignore', 'replace' and\n\ 84 'xmlcharrefreplace' as well as any other name registered with\n\ 85 codecs.register_error that can handle ValueErrors."); 86 87 static PyObject * 88 codec_encode(PyObject *self, PyObject *args) 89 { 90 const char *encoding = NULL; 91 const char *errors = NULL; 92 PyObject *v; 93 94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors)) 95 return NULL; 96 97 #ifdef Py_USING_UNICODE 98 if (encoding == NULL) 99 encoding = PyUnicode_GetDefaultEncoding(); 100 #else 101 if (encoding == NULL) { 102 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 103 return NULL; 104 } 105 #endif 106 107 /* Encode via the codec registry */ 108 return PyCodec_Encode(v, encoding, errors); 109 } 110 111 PyDoc_STRVAR(decode__doc__, 112 "decode(obj, [encoding[,errors]]) -> object\n\ 113 \n\ 114 Decodes obj using the codec registered for encoding. encoding defaults\n\ 115 to the default encoding. errors may be given to set a different error\n\ 116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 117 a ValueError. Other possible values are 'ignore' and 'replace'\n\ 118 as well as any other name registered with codecs.register_error that is\n\ 119 able to handle ValueErrors."); 120 121 static PyObject * 122 codec_decode(PyObject *self, PyObject *args) 123 { 124 const char *encoding = NULL; 125 const char *errors = NULL; 126 PyObject *v; 127 128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors)) 129 return NULL; 130 131 #ifdef Py_USING_UNICODE 132 if (encoding == NULL) 133 encoding = PyUnicode_GetDefaultEncoding(); 134 #else 135 if (encoding == NULL) { 136 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 137 return NULL; 138 } 139 #endif 140 141 /* Decode via the codec registry */ 142 return PyCodec_Decode(v, encoding, errors); 143 } 144 145 /* --- Helpers ------------------------------------------------------------ */ 146 147 static 148 PyObject *codec_tuple(PyObject *unicode, 149 Py_ssize_t len) 150 { 151 PyObject *v; 152 if (unicode == NULL) 153 return NULL; 154 v = Py_BuildValue("On", unicode, len); 155 Py_DECREF(unicode); 156 return v; 157 } 158 159 /* --- String codecs ------------------------------------------------------ */ 160 static PyObject * 161 escape_decode(PyObject *self, 162 PyObject *args) 163 { 164 const char *errors = NULL; 165 const char *data; 166 Py_ssize_t size; 167 168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode", 169 &data, &size, &errors)) 170 return NULL; 171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL), 172 size); 173 } 174 175 static PyObject * 176 escape_encode(PyObject *self, 177 PyObject *args) 178 { 179 PyObject *str; 180 const char *errors = NULL; 181 char *buf; 182 Py_ssize_t consumed, len; 183 184 if (!PyArg_ParseTuple(args, "S|z:escape_encode", 185 &str, &errors)) 186 return NULL; 187 188 consumed = PyString_GET_SIZE(str); 189 str = PyString_Repr(str, 0); 190 if (!str) 191 return NULL; 192 193 /* The string will be quoted. Unquote, similar to unicode-escape. */ 194 buf = PyString_AS_STRING (str); 195 len = PyString_GET_SIZE (str); 196 memmove(buf, buf+1, len-2); 197 if (_PyString_Resize(&str, len-2) < 0) 198 return NULL; 199 200 return codec_tuple(str, consumed); 201 } 202 203 #ifdef Py_USING_UNICODE 204 /* --- Decoder ------------------------------------------------------------ */ 205 206 static PyObject * 207 unicode_internal_decode(PyObject *self, 208 PyObject *args) 209 { 210 PyObject *obj; 211 const char *errors = NULL; 212 const char *data; 213 Py_ssize_t size; 214 215 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode", 216 &obj, &errors)) 217 return NULL; 218 219 if (PyUnicode_Check(obj)) { 220 Py_INCREF(obj); 221 return codec_tuple(obj, PyUnicode_GET_SIZE(obj)); 222 } 223 else { 224 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) 225 return NULL; 226 227 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors), 228 size); 229 } 230 } 231 232 static PyObject * 233 utf_7_decode(PyObject *self, 234 PyObject *args) 235 { 236 Py_buffer pbuf; 237 const char *errors = NULL; 238 int final = 0; 239 Py_ssize_t consumed; 240 PyObject *decoded = NULL; 241 242 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode", 243 &pbuf, &errors, &final)) 244 return NULL; 245 consumed = pbuf.len; 246 247 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors, 248 final ? NULL : &consumed); 249 PyBuffer_Release(&pbuf); 250 if (decoded == NULL) 251 return NULL; 252 return codec_tuple(decoded, consumed); 253 } 254 255 static PyObject * 256 utf_8_decode(PyObject *self, 257 PyObject *args) 258 { 259 Py_buffer pbuf; 260 const char *errors = NULL; 261 int final = 0; 262 Py_ssize_t consumed; 263 PyObject *decoded = NULL; 264 265 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode", 266 &pbuf, &errors, &final)) 267 return NULL; 268 consumed = pbuf.len; 269 270 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors, 271 final ? NULL : &consumed); 272 PyBuffer_Release(&pbuf); 273 if (decoded == NULL) 274 return NULL; 275 return codec_tuple(decoded, consumed); 276 } 277 278 static PyObject * 279 utf_16_decode(PyObject *self, 280 PyObject *args) 281 { 282 Py_buffer pbuf; 283 const char *errors = NULL; 284 int byteorder = 0; 285 int final = 0; 286 Py_ssize_t consumed; 287 PyObject *decoded; 288 289 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode", 290 &pbuf, &errors, &final)) 291 return NULL; 292 consumed = pbuf.len; /* This is overwritten unless final is true. */ 293 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 294 &byteorder, final ? NULL : &consumed); 295 PyBuffer_Release(&pbuf); 296 if (decoded == NULL) 297 return NULL; 298 return codec_tuple(decoded, consumed); 299 } 300 301 static PyObject * 302 utf_16_le_decode(PyObject *self, 303 PyObject *args) 304 { 305 Py_buffer pbuf; 306 const char *errors = NULL; 307 int byteorder = -1; 308 int final = 0; 309 Py_ssize_t consumed; 310 PyObject *decoded = NULL; 311 312 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode", 313 &pbuf, &errors, &final)) 314 return NULL; 315 316 consumed = pbuf.len; /* This is overwritten unless final is true. */ 317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 318 &byteorder, final ? NULL : &consumed); 319 PyBuffer_Release(&pbuf); 320 if (decoded == NULL) 321 return NULL; 322 return codec_tuple(decoded, consumed); 323 } 324 325 static PyObject * 326 utf_16_be_decode(PyObject *self, 327 PyObject *args) 328 { 329 Py_buffer pbuf; 330 const char *errors = NULL; 331 int byteorder = 1; 332 int final = 0; 333 Py_ssize_t consumed; 334 PyObject *decoded = NULL; 335 336 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode", 337 &pbuf, &errors, &final)) 338 return NULL; 339 340 consumed = pbuf.len; /* This is overwritten unless final is true. */ 341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 342 &byteorder, final ? NULL : &consumed); 343 PyBuffer_Release(&pbuf); 344 if (decoded == NULL) 345 return NULL; 346 return codec_tuple(decoded, consumed); 347 } 348 349 /* This non-standard version also provides access to the byteorder 350 parameter of the builtin UTF-16 codec. 351 352 It returns a tuple (unicode, bytesread, byteorder) with byteorder 353 being the value in effect at the end of data. 354 355 */ 356 357 static PyObject * 358 utf_16_ex_decode(PyObject *self, 359 PyObject *args) 360 { 361 Py_buffer pbuf; 362 const char *errors = NULL; 363 int byteorder = 0; 364 PyObject *unicode, *tuple; 365 int final = 0; 366 Py_ssize_t consumed; 367 368 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode", 369 &pbuf, &errors, &byteorder, &final)) 370 return NULL; 371 consumed = pbuf.len; /* This is overwritten unless final is true. */ 372 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 373 &byteorder, final ? NULL : &consumed); 374 PyBuffer_Release(&pbuf); 375 if (unicode == NULL) 376 return NULL; 377 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder); 378 Py_DECREF(unicode); 379 return tuple; 380 } 381 382 static PyObject * 383 utf_32_decode(PyObject *self, 384 PyObject *args) 385 { 386 Py_buffer pbuf; 387 const char *errors = NULL; 388 int byteorder = 0; 389 int final = 0; 390 Py_ssize_t consumed; 391 PyObject *decoded; 392 393 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode", 394 &pbuf, &errors, &final)) 395 return NULL; 396 consumed = pbuf.len; /* This is overwritten unless final is true. */ 397 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 398 &byteorder, final ? NULL : &consumed); 399 PyBuffer_Release(&pbuf); 400 if (decoded == NULL) 401 return NULL; 402 return codec_tuple(decoded, consumed); 403 } 404 405 static PyObject * 406 utf_32_le_decode(PyObject *self, 407 PyObject *args) 408 { 409 Py_buffer pbuf; 410 const char *errors = NULL; 411 int byteorder = -1; 412 int final = 0; 413 Py_ssize_t consumed; 414 PyObject *decoded; 415 416 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode", 417 &pbuf, &errors, &final)) 418 return NULL; 419 consumed = pbuf.len; /* This is overwritten unless final is true. */ 420 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 421 &byteorder, final ? NULL : &consumed); 422 PyBuffer_Release(&pbuf); 423 if (decoded == NULL) 424 return NULL; 425 return codec_tuple(decoded, consumed); 426 } 427 428 static PyObject * 429 utf_32_be_decode(PyObject *self, 430 PyObject *args) 431 { 432 Py_buffer pbuf; 433 const char *errors = NULL; 434 int byteorder = 1; 435 int final = 0; 436 Py_ssize_t consumed; 437 PyObject *decoded; 438 439 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode", 440 &pbuf, &errors, &final)) 441 return NULL; 442 consumed = pbuf.len; /* This is overwritten unless final is true. */ 443 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 444 &byteorder, final ? NULL : &consumed); 445 PyBuffer_Release(&pbuf); 446 if (decoded == NULL) 447 return NULL; 448 return codec_tuple(decoded, consumed); 449 } 450 451 /* This non-standard version also provides access to the byteorder 452 parameter of the builtin UTF-32 codec. 453 454 It returns a tuple (unicode, bytesread, byteorder) with byteorder 455 being the value in effect at the end of data. 456 457 */ 458 459 static PyObject * 460 utf_32_ex_decode(PyObject *self, 461 PyObject *args) 462 { 463 Py_buffer pbuf; 464 const char *errors = NULL; 465 int byteorder = 0; 466 PyObject *unicode, *tuple; 467 int final = 0; 468 Py_ssize_t consumed; 469 470 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode", 471 &pbuf, &errors, &byteorder, &final)) 472 return NULL; 473 consumed = pbuf.len; /* This is overwritten unless final is true. */ 474 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 475 &byteorder, final ? NULL : &consumed); 476 PyBuffer_Release(&pbuf); 477 if (unicode == NULL) 478 return NULL; 479 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder); 480 Py_DECREF(unicode); 481 return tuple; 482 } 483 484 static PyObject * 485 unicode_escape_decode(PyObject *self, 486 PyObject *args) 487 { 488 Py_buffer pbuf; 489 const char *errors = NULL; 490 PyObject *unicode; 491 492 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode", 493 &pbuf, &errors)) 494 return NULL; 495 496 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors); 497 PyBuffer_Release(&pbuf); 498 return codec_tuple(unicode, pbuf.len); 499 } 500 501 static PyObject * 502 raw_unicode_escape_decode(PyObject *self, 503 PyObject *args) 504 { 505 Py_buffer pbuf; 506 const char *errors = NULL; 507 PyObject *unicode; 508 509 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode", 510 &pbuf, &errors)) 511 return NULL; 512 513 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors); 514 PyBuffer_Release(&pbuf); 515 return codec_tuple(unicode, pbuf.len); 516 } 517 518 static PyObject * 519 latin_1_decode(PyObject *self, 520 PyObject *args) 521 { 522 Py_buffer pbuf; 523 PyObject *unicode; 524 const char *errors = NULL; 525 526 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode", 527 &pbuf, &errors)) 528 return NULL; 529 530 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors); 531 PyBuffer_Release(&pbuf); 532 return codec_tuple(unicode, pbuf.len); 533 } 534 535 static PyObject * 536 ascii_decode(PyObject *self, 537 PyObject *args) 538 { 539 Py_buffer pbuf; 540 PyObject *unicode; 541 const char *errors = NULL; 542 543 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode", 544 &pbuf, &errors)) 545 return NULL; 546 547 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors); 548 PyBuffer_Release(&pbuf); 549 return codec_tuple(unicode, pbuf.len); 550 } 551 552 static PyObject * 553 charmap_decode(PyObject *self, 554 PyObject *args) 555 { 556 Py_buffer pbuf; 557 PyObject *unicode; 558 const char *errors = NULL; 559 PyObject *mapping = NULL; 560 561 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode", 562 &pbuf, &errors, &mapping)) 563 return NULL; 564 if (mapping == Py_None) 565 mapping = NULL; 566 567 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors); 568 PyBuffer_Release(&pbuf); 569 return codec_tuple(unicode, pbuf.len); 570 } 571 572 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) 573 574 static PyObject * 575 mbcs_decode(PyObject *self, 576 PyObject *args) 577 { 578 Py_buffer pbuf; 579 const char *errors = NULL; 580 int final = 0; 581 Py_ssize_t consumed; 582 PyObject *decoded = NULL; 583 584 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode", 585 &pbuf, &errors, &final)) 586 return NULL; 587 consumed = pbuf.len; 588 589 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors, 590 final ? NULL : &consumed); 591 PyBuffer_Release(&pbuf); 592 if (decoded == NULL) 593 return NULL; 594 return codec_tuple(decoded, consumed); 595 } 596 597 #endif /* MS_WINDOWS */ 598 599 /* --- Encoder ------------------------------------------------------------ */ 600 601 static PyObject * 602 readbuffer_encode(PyObject *self, 603 PyObject *args) 604 { 605 const char *data; 606 Py_ssize_t size; 607 const char *errors = NULL; 608 609 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode", 610 &data, &size, &errors)) 611 return NULL; 612 613 return codec_tuple(PyString_FromStringAndSize(data, size), 614 size); 615 } 616 617 static PyObject * 618 charbuffer_encode(PyObject *self, 619 PyObject *args) 620 { 621 const char *data; 622 Py_ssize_t size; 623 const char *errors = NULL; 624 625 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode", 626 &data, &size, &errors)) 627 return NULL; 628 629 return codec_tuple(PyString_FromStringAndSize(data, size), 630 size); 631 } 632 633 static PyObject * 634 unicode_internal_encode(PyObject *self, 635 PyObject *args) 636 { 637 PyObject *obj; 638 const char *errors = NULL; 639 const char *data; 640 Py_ssize_t size; 641 642 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode", 643 &obj, &errors)) 644 return NULL; 645 646 if (PyUnicode_Check(obj)) { 647 data = PyUnicode_AS_DATA(obj); 648 size = PyUnicode_GET_DATA_SIZE(obj); 649 return codec_tuple(PyString_FromStringAndSize(data, size), 650 PyUnicode_GET_SIZE(obj)); 651 } 652 else { 653 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) 654 return NULL; 655 return codec_tuple(PyString_FromStringAndSize(data, size), 656 size); 657 } 658 } 659 660 static PyObject * 661 utf_7_encode(PyObject *self, 662 PyObject *args) 663 { 664 PyObject *str, *v; 665 const char *errors = NULL; 666 667 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode", 668 &str, &errors)) 669 return NULL; 670 671 str = PyUnicode_FromObject(str); 672 if (str == NULL) 673 return NULL; 674 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str), 675 PyUnicode_GET_SIZE(str), 676 0, 677 0, 678 errors), 679 PyUnicode_GET_SIZE(str)); 680 Py_DECREF(str); 681 return v; 682 } 683 684 static PyObject * 685 utf_8_encode(PyObject *self, 686 PyObject *args) 687 { 688 PyObject *str, *v; 689 const char *errors = NULL; 690 691 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode", 692 &str, &errors)) 693 return NULL; 694 695 str = PyUnicode_FromObject(str); 696 if (str == NULL) 697 return NULL; 698 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str), 699 PyUnicode_GET_SIZE(str), 700 errors), 701 PyUnicode_GET_SIZE(str)); 702 Py_DECREF(str); 703 return v; 704 } 705 706 /* This version provides access to the byteorder parameter of the 707 builtin UTF-16 codecs as optional third argument. It defaults to 0 708 which means: use the native byte order and prepend the data with a 709 BOM mark. 710 711 */ 712 713 static PyObject * 714 utf_16_encode(PyObject *self, 715 PyObject *args) 716 { 717 PyObject *str, *v; 718 const char *errors = NULL; 719 int byteorder = 0; 720 721 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode", 722 &str, &errors, &byteorder)) 723 return NULL; 724 725 str = PyUnicode_FromObject(str); 726 if (str == NULL) 727 return NULL; 728 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), 729 PyUnicode_GET_SIZE(str), 730 errors, 731 byteorder), 732 PyUnicode_GET_SIZE(str)); 733 Py_DECREF(str); 734 return v; 735 } 736 737 static PyObject * 738 utf_16_le_encode(PyObject *self, 739 PyObject *args) 740 { 741 PyObject *str, *v; 742 const char *errors = NULL; 743 744 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode", 745 &str, &errors)) 746 return NULL; 747 748 str = PyUnicode_FromObject(str); 749 if (str == NULL) 750 return NULL; 751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), 752 PyUnicode_GET_SIZE(str), 753 errors, 754 -1), 755 PyUnicode_GET_SIZE(str)); 756 Py_DECREF(str); 757 return v; 758 } 759 760 static PyObject * 761 utf_16_be_encode(PyObject *self, 762 PyObject *args) 763 { 764 PyObject *str, *v; 765 const char *errors = NULL; 766 767 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode", 768 &str, &errors)) 769 return NULL; 770 771 str = PyUnicode_FromObject(str); 772 if (str == NULL) 773 return NULL; 774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), 775 PyUnicode_GET_SIZE(str), 776 errors, 777 +1), 778 PyUnicode_GET_SIZE(str)); 779 Py_DECREF(str); 780 return v; 781 } 782 783 /* This version provides access to the byteorder parameter of the 784 builtin UTF-32 codecs as optional third argument. It defaults to 0 785 which means: use the native byte order and prepend the data with a 786 BOM mark. 787 788 */ 789 790 static PyObject * 791 utf_32_encode(PyObject *self, 792 PyObject *args) 793 { 794 PyObject *str, *v; 795 const char *errors = NULL; 796 int byteorder = 0; 797 798 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode", 799 &str, &errors, &byteorder)) 800 return NULL; 801 802 str = PyUnicode_FromObject(str); 803 if (str == NULL) 804 return NULL; 805 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), 806 PyUnicode_GET_SIZE(str), 807 errors, 808 byteorder), 809 PyUnicode_GET_SIZE(str)); 810 Py_DECREF(str); 811 return v; 812 } 813 814 static PyObject * 815 utf_32_le_encode(PyObject *self, 816 PyObject *args) 817 { 818 PyObject *str, *v; 819 const char *errors = NULL; 820 821 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode", 822 &str, &errors)) 823 return NULL; 824 825 str = PyUnicode_FromObject(str); 826 if (str == NULL) 827 return NULL; 828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), 829 PyUnicode_GET_SIZE(str), 830 errors, 831 -1), 832 PyUnicode_GET_SIZE(str)); 833 Py_DECREF(str); 834 return v; 835 } 836 837 static PyObject * 838 utf_32_be_encode(PyObject *self, 839 PyObject *args) 840 { 841 PyObject *str, *v; 842 const char *errors = NULL; 843 844 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode", 845 &str, &errors)) 846 return NULL; 847 848 str = PyUnicode_FromObject(str); 849 if (str == NULL) 850 return NULL; 851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), 852 PyUnicode_GET_SIZE(str), 853 errors, 854 +1), 855 PyUnicode_GET_SIZE(str)); 856 Py_DECREF(str); 857 return v; 858 } 859 860 static PyObject * 861 unicode_escape_encode(PyObject *self, 862 PyObject *args) 863 { 864 PyObject *str, *v; 865 const char *errors = NULL; 866 867 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode", 868 &str, &errors)) 869 return NULL; 870 871 str = PyUnicode_FromObject(str); 872 if (str == NULL) 873 return NULL; 874 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str), 875 PyUnicode_GET_SIZE(str)), 876 PyUnicode_GET_SIZE(str)); 877 Py_DECREF(str); 878 return v; 879 } 880 881 static PyObject * 882 raw_unicode_escape_encode(PyObject *self, 883 PyObject *args) 884 { 885 PyObject *str, *v; 886 const char *errors = NULL; 887 888 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode", 889 &str, &errors)) 890 return NULL; 891 892 str = PyUnicode_FromObject(str); 893 if (str == NULL) 894 return NULL; 895 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape( 896 PyUnicode_AS_UNICODE(str), 897 PyUnicode_GET_SIZE(str)), 898 PyUnicode_GET_SIZE(str)); 899 Py_DECREF(str); 900 return v; 901 } 902 903 static PyObject * 904 latin_1_encode(PyObject *self, 905 PyObject *args) 906 { 907 PyObject *str, *v; 908 const char *errors = NULL; 909 910 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode", 911 &str, &errors)) 912 return NULL; 913 914 str = PyUnicode_FromObject(str); 915 if (str == NULL) 916 return NULL; 917 v = codec_tuple(PyUnicode_EncodeLatin1( 918 PyUnicode_AS_UNICODE(str), 919 PyUnicode_GET_SIZE(str), 920 errors), 921 PyUnicode_GET_SIZE(str)); 922 Py_DECREF(str); 923 return v; 924 } 925 926 static PyObject * 927 ascii_encode(PyObject *self, 928 PyObject *args) 929 { 930 PyObject *str, *v; 931 const char *errors = NULL; 932 933 if (!PyArg_ParseTuple(args, "O|z:ascii_encode", 934 &str, &errors)) 935 return NULL; 936 937 str = PyUnicode_FromObject(str); 938 if (str == NULL) 939 return NULL; 940 v = codec_tuple(PyUnicode_EncodeASCII( 941 PyUnicode_AS_UNICODE(str), 942 PyUnicode_GET_SIZE(str), 943 errors), 944 PyUnicode_GET_SIZE(str)); 945 Py_DECREF(str); 946 return v; 947 } 948 949 static PyObject * 950 charmap_encode(PyObject *self, 951 PyObject *args) 952 { 953 PyObject *str, *v; 954 const char *errors = NULL; 955 PyObject *mapping = NULL; 956 957 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode", 958 &str, &errors, &mapping)) 959 return NULL; 960 if (mapping == Py_None) 961 mapping = NULL; 962 963 str = PyUnicode_FromObject(str); 964 if (str == NULL) 965 return NULL; 966 v = codec_tuple(PyUnicode_EncodeCharmap( 967 PyUnicode_AS_UNICODE(str), 968 PyUnicode_GET_SIZE(str), 969 mapping, 970 errors), 971 PyUnicode_GET_SIZE(str)); 972 Py_DECREF(str); 973 return v; 974 } 975 976 static PyObject* 977 charmap_build(PyObject *self, PyObject *args) 978 { 979 PyObject *map; 980 if (!PyArg_ParseTuple(args, "U:charmap_build", &map)) 981 return NULL; 982 return PyUnicode_BuildEncodingMap(map); 983 } 984 985 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) 986 987 static PyObject * 988 mbcs_encode(PyObject *self, 989 PyObject *args) 990 { 991 PyObject *str, *v; 992 const char *errors = NULL; 993 994 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode", 995 &str, &errors)) 996 return NULL; 997 998 str = PyUnicode_FromObject(str); 999 if (str == NULL) 1000 return NULL; 1001 v = codec_tuple(PyUnicode_EncodeMBCS( 1002 PyUnicode_AS_UNICODE(str), 1003 PyUnicode_GET_SIZE(str), 1004 errors), 1005 PyUnicode_GET_SIZE(str)); 1006 Py_DECREF(str); 1007 return v; 1008 } 1009 1010 #endif /* MS_WINDOWS */ 1011 #endif /* Py_USING_UNICODE */ 1012 1013 /* --- Error handler registry --------------------------------------------- */ 1014 1015 PyDoc_STRVAR(register_error__doc__, 1016 "register_error(errors, handler)\n\ 1017 \n\ 1018 Register the specified error handler under the name\n\ 1019 errors. handler must be a callable object, that\n\ 1020 will be called with an exception instance containing\n\ 1021 information about the location of the encoding/decoding\n\ 1022 error and must return a (replacement, new position) tuple."); 1023 1024 static PyObject *register_error(PyObject *self, PyObject *args) 1025 { 1026 const char *name; 1027 PyObject *handler; 1028 1029 if (!PyArg_ParseTuple(args, "sO:register_error", 1030 &name, &handler)) 1031 return NULL; 1032 if (PyCodec_RegisterError(name, handler)) 1033 return NULL; 1034 Py_RETURN_NONE; 1035 } 1036 1037 PyDoc_STRVAR(lookup_error__doc__, 1038 "lookup_error(errors) -> handler\n\ 1039 \n\ 1040 Return the error handler for the specified error handling name\n\ 1041 or raise a LookupError, if no handler exists under this name."); 1042 1043 static PyObject *lookup_error(PyObject *self, PyObject *args) 1044 { 1045 const char *name; 1046 1047 if (!PyArg_ParseTuple(args, "s:lookup_error", 1048 &name)) 1049 return NULL; 1050 return PyCodec_LookupError(name); 1051 } 1052 1053 /* --- Module API --------------------------------------------------------- */ 1054 1055 static PyMethodDef _codecs_functions[] = { 1056 {"register", codec_register, METH_O, 1057 register__doc__}, 1058 {"lookup", codec_lookup, METH_VARARGS, 1059 lookup__doc__}, 1060 {"encode", codec_encode, METH_VARARGS, 1061 encode__doc__}, 1062 {"decode", codec_decode, METH_VARARGS, 1063 decode__doc__}, 1064 {"escape_encode", escape_encode, METH_VARARGS}, 1065 {"escape_decode", escape_decode, METH_VARARGS}, 1066 #ifdef Py_USING_UNICODE 1067 {"utf_8_encode", utf_8_encode, METH_VARARGS}, 1068 {"utf_8_decode", utf_8_decode, METH_VARARGS}, 1069 {"utf_7_encode", utf_7_encode, METH_VARARGS}, 1070 {"utf_7_decode", utf_7_decode, METH_VARARGS}, 1071 {"utf_16_encode", utf_16_encode, METH_VARARGS}, 1072 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS}, 1073 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS}, 1074 {"utf_16_decode", utf_16_decode, METH_VARARGS}, 1075 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS}, 1076 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS}, 1077 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS}, 1078 {"utf_32_encode", utf_32_encode, METH_VARARGS}, 1079 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS}, 1080 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS}, 1081 {"utf_32_decode", utf_32_decode, METH_VARARGS}, 1082 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS}, 1083 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS}, 1084 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS}, 1085 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS}, 1086 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS}, 1087 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS}, 1088 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS}, 1089 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS}, 1090 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS}, 1091 {"latin_1_encode", latin_1_encode, METH_VARARGS}, 1092 {"latin_1_decode", latin_1_decode, METH_VARARGS}, 1093 {"ascii_encode", ascii_encode, METH_VARARGS}, 1094 {"ascii_decode", ascii_decode, METH_VARARGS}, 1095 {"charmap_encode", charmap_encode, METH_VARARGS}, 1096 {"charmap_decode", charmap_decode, METH_VARARGS}, 1097 {"charmap_build", charmap_build, METH_VARARGS}, 1098 {"readbuffer_encode", readbuffer_encode, METH_VARARGS}, 1099 {"charbuffer_encode", charbuffer_encode, METH_VARARGS}, 1100 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) 1101 {"mbcs_encode", mbcs_encode, METH_VARARGS}, 1102 {"mbcs_decode", mbcs_decode, METH_VARARGS}, 1103 #endif 1104 #endif /* Py_USING_UNICODE */ 1105 {"register_error", register_error, METH_VARARGS, 1106 register_error__doc__}, 1107 {"lookup_error", lookup_error, METH_VARARGS, 1108 lookup_error__doc__}, 1109 {NULL, NULL} /* sentinel */ 1110 }; 1111 1112 PyMODINIT_FUNC 1113 init_codecs(void) 1114 { 1115 Py_InitModule("_codecs", _codecs_functions); 1116 } 1117