Home | History | Annotate | Download | only in Modules
      1 /* ------------------------------------------------------------------------
      2 
      3    _codecs -- Provides access to the codec registry and the builtin
      4               codecs.
      5 
      6    This module should never be imported directly. The standard library
      7    module "codecs" wraps this builtin module for use within Python.
      8 
      9    The codec registry is accessible via:
     10 
     11      register(search_function) -> None
     12 
     13      lookup(encoding) -> CodecInfo object
     14 
     15    The builtin Unicode codecs use the following interface:
     16 
     17      <encoding>_encode(Unicode_object[,errors='strict']) ->
     18         (string object, bytes consumed)
     19 
     20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
     21         (Unicode object, bytes consumed)
     22 
     23    <encoding>_encode() interfaces also accept non-Unicode object as
     24    input. The objects are then converted to Unicode using
     25    PyUnicode_FromObject() prior to applying the conversion.
     26 
     27    These <encoding>s are available: utf_8, unicode_escape,
     28    raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
     29    mbcs (on win32).
     30 
     31 
     32 Written by Marc-Andre Lemburg (mal (at) lemburg.com).
     33 
     34 Copyright (c) Corporation for National Research Initiatives.
     35 
     36    ------------------------------------------------------------------------ */
     37 
     38 #define PY_SSIZE_T_CLEAN
     39 #include "Python.h"
     40 
     41 /* --- Registry ----------------------------------------------------------- */
     42 
     43 PyDoc_STRVAR(register__doc__,
     44 "register(search_function)\n\
     45 \n\
     46 Register a codec search function. Search functions are expected to take\n\
     47 one argument, the encoding name in all lower case letters, and return\n\
     48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
     49 (or a CodecInfo object).");
     50 
     51 static
     52 PyObject *codec_register(PyObject *self, PyObject *search_function)
     53 {
     54     if (PyCodec_Register(search_function))
     55         return NULL;
     56 
     57     Py_RETURN_NONE;
     58 }
     59 
     60 PyDoc_STRVAR(lookup__doc__,
     61 "lookup(encoding) -> CodecInfo\n\
     62 \n\
     63 Looks up a codec tuple in the Python codec registry and returns\n\
     64 a CodecInfo object.");
     65 
     66 static
     67 PyObject *codec_lookup(PyObject *self, PyObject *args)
     68 {
     69     char *encoding;
     70 
     71     if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
     72         return NULL;
     73 
     74     return _PyCodec_Lookup(encoding);
     75 }
     76 
     77 PyDoc_STRVAR(encode__doc__,
     78 "encode(obj, [encoding[,errors]]) -> object\n\
     79 \n\
     80 Encodes obj using the codec registered for encoding. encoding defaults\n\
     81 to the default encoding. errors may be given to set a different error\n\
     82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
     83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
     84 'xmlcharrefreplace' as well as any other name registered with\n\
     85 codecs.register_error that can handle ValueErrors.");
     86 
     87 static PyObject *
     88 codec_encode(PyObject *self, PyObject *args)
     89 {
     90     const char *encoding = NULL;
     91     const char *errors = NULL;
     92     PyObject *v;
     93 
     94     if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
     95         return NULL;
     96 
     97 #ifdef Py_USING_UNICODE
     98     if (encoding == NULL)
     99         encoding = PyUnicode_GetDefaultEncoding();
    100 #else
    101     if (encoding == NULL) {
    102         PyErr_SetString(PyExc_ValueError, "no encoding specified");
    103         return NULL;
    104     }
    105 #endif
    106 
    107     /* Encode via the codec registry */
    108     return PyCodec_Encode(v, encoding, errors);
    109 }
    110 
    111 PyDoc_STRVAR(decode__doc__,
    112 "decode(obj, [encoding[,errors]]) -> object\n\
    113 \n\
    114 Decodes obj using the codec registered for encoding. encoding defaults\n\
    115 to the default encoding. errors may be given to set a different error\n\
    116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
    117 a ValueError. Other possible values are 'ignore' and 'replace'\n\
    118 as well as any other name registered with codecs.register_error that is\n\
    119 able to handle ValueErrors.");
    120 
    121 static PyObject *
    122 codec_decode(PyObject *self, PyObject *args)
    123 {
    124     const char *encoding = NULL;
    125     const char *errors = NULL;
    126     PyObject *v;
    127 
    128     if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
    129         return NULL;
    130 
    131 #ifdef Py_USING_UNICODE
    132     if (encoding == NULL)
    133         encoding = PyUnicode_GetDefaultEncoding();
    134 #else
    135     if (encoding == NULL) {
    136         PyErr_SetString(PyExc_ValueError, "no encoding specified");
    137         return NULL;
    138     }
    139 #endif
    140 
    141     /* Decode via the codec registry */
    142     return PyCodec_Decode(v, encoding, errors);
    143 }
    144 
    145 /* --- Helpers ------------------------------------------------------------ */
    146 
    147 static
    148 PyObject *codec_tuple(PyObject *unicode,
    149                       Py_ssize_t len)
    150 {
    151     PyObject *v;
    152     if (unicode == NULL)
    153         return NULL;
    154     v = Py_BuildValue("On", unicode, len);
    155     Py_DECREF(unicode);
    156     return v;
    157 }
    158 
    159 /* --- String codecs ------------------------------------------------------ */
    160 static PyObject *
    161 escape_decode(PyObject *self,
    162               PyObject *args)
    163 {
    164     const char *errors = NULL;
    165     const char *data;
    166     Py_ssize_t size;
    167 
    168     if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
    169                           &data, &size, &errors))
    170         return NULL;
    171     return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
    172                        size);
    173 }
    174 
    175 static PyObject *
    176 escape_encode(PyObject *self,
    177               PyObject *args)
    178 {
    179     PyObject *str;
    180     const char *errors = NULL;
    181     char *buf;
    182     Py_ssize_t consumed, len;
    183 
    184     if (!PyArg_ParseTuple(args, "S|z:escape_encode",
    185                           &str, &errors))
    186         return NULL;
    187 
    188     consumed = PyString_GET_SIZE(str);
    189     str = PyString_Repr(str, 0);
    190     if (!str)
    191         return NULL;
    192 
    193     /* The string will be quoted. Unquote, similar to unicode-escape. */
    194     buf = PyString_AS_STRING (str);
    195     len = PyString_GET_SIZE (str);
    196     memmove(buf, buf+1, len-2);
    197     if (_PyString_Resize(&str, len-2) < 0)
    198         return NULL;
    199 
    200     return codec_tuple(str, consumed);
    201 }
    202 
    203 #ifdef Py_USING_UNICODE
    204 /* --- Decoder ------------------------------------------------------------ */
    205 
    206 static PyObject *
    207 unicode_internal_decode(PyObject *self,
    208                         PyObject *args)
    209 {
    210     PyObject *obj;
    211     const char *errors = NULL;
    212     const char *data;
    213     Py_ssize_t size;
    214 
    215     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
    216                           &obj, &errors))
    217         return NULL;
    218 
    219     if (PyUnicode_Check(obj)) {
    220         Py_INCREF(obj);
    221         return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
    222     }
    223     else {
    224         if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
    225             return NULL;
    226 
    227         return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
    228                            size);
    229     }
    230 }
    231 
    232 static PyObject *
    233 utf_7_decode(PyObject *self,
    234              PyObject *args)
    235 {
    236     Py_buffer pbuf;
    237     const char *errors = NULL;
    238     int final = 0;
    239     Py_ssize_t consumed;
    240     PyObject *decoded = NULL;
    241 
    242     if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
    243                           &pbuf, &errors, &final))
    244         return NULL;
    245     consumed = pbuf.len;
    246 
    247     decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
    248                                            final ? NULL : &consumed);
    249     PyBuffer_Release(&pbuf);
    250     if (decoded == NULL)
    251         return NULL;
    252     return codec_tuple(decoded, consumed);
    253 }
    254 
    255 static PyObject *
    256 utf_8_decode(PyObject *self,
    257             PyObject *args)
    258 {
    259     Py_buffer pbuf;
    260     const char *errors = NULL;
    261     int final = 0;
    262     Py_ssize_t consumed;
    263     PyObject *decoded = NULL;
    264 
    265     if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
    266                           &pbuf, &errors, &final))
    267         return NULL;
    268     consumed = pbuf.len;
    269 
    270     decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
    271                                            final ? NULL : &consumed);
    272     PyBuffer_Release(&pbuf);
    273     if (decoded == NULL)
    274         return NULL;
    275     return codec_tuple(decoded, consumed);
    276 }
    277 
    278 static PyObject *
    279 utf_16_decode(PyObject *self,
    280             PyObject *args)
    281 {
    282     Py_buffer pbuf;
    283     const char *errors = NULL;
    284     int byteorder = 0;
    285     int final = 0;
    286     Py_ssize_t consumed;
    287     PyObject *decoded;
    288 
    289     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
    290                           &pbuf, &errors, &final))
    291         return NULL;
    292     consumed = pbuf.len; /* This is overwritten unless final is true. */
    293     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
    294                                         &byteorder, final ? NULL : &consumed);
    295     PyBuffer_Release(&pbuf);
    296     if (decoded == NULL)
    297         return NULL;
    298     return codec_tuple(decoded, consumed);
    299 }
    300 
    301 static PyObject *
    302 utf_16_le_decode(PyObject *self,
    303                  PyObject *args)
    304 {
    305     Py_buffer pbuf;
    306     const char *errors = NULL;
    307     int byteorder = -1;
    308     int final = 0;
    309     Py_ssize_t consumed;
    310     PyObject *decoded = NULL;
    311 
    312     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
    313                           &pbuf, &errors, &final))
    314         return NULL;
    315 
    316     consumed = pbuf.len; /* This is overwritten unless final is true. */
    317     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
    318         &byteorder, final ? NULL : &consumed);
    319     PyBuffer_Release(&pbuf);
    320     if (decoded == NULL)
    321         return NULL;
    322     return codec_tuple(decoded, consumed);
    323 }
    324 
    325 static PyObject *
    326 utf_16_be_decode(PyObject *self,
    327                  PyObject *args)
    328 {
    329     Py_buffer pbuf;
    330     const char *errors = NULL;
    331     int byteorder = 1;
    332     int final = 0;
    333     Py_ssize_t consumed;
    334     PyObject *decoded = NULL;
    335 
    336     if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
    337                           &pbuf, &errors, &final))
    338         return NULL;
    339 
    340     consumed = pbuf.len; /* This is overwritten unless final is true. */
    341     decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
    342         &byteorder, final ? NULL : &consumed);
    343     PyBuffer_Release(&pbuf);
    344     if (decoded == NULL)
    345         return NULL;
    346     return codec_tuple(decoded, consumed);
    347 }
    348 
    349 /* This non-standard version also provides access to the byteorder
    350    parameter of the builtin UTF-16 codec.
    351 
    352    It returns a tuple (unicode, bytesread, byteorder) with byteorder
    353    being the value in effect at the end of data.
    354 
    355 */
    356 
    357 static PyObject *
    358 utf_16_ex_decode(PyObject *self,
    359                  PyObject *args)
    360 {
    361     Py_buffer pbuf;
    362     const char *errors = NULL;
    363     int byteorder = 0;
    364     PyObject *unicode, *tuple;
    365     int final = 0;
    366     Py_ssize_t consumed;
    367 
    368     if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
    369                           &pbuf, &errors, &byteorder, &final))
    370         return NULL;
    371     consumed = pbuf.len; /* This is overwritten unless final is true. */
    372     unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
    373                                         &byteorder, final ? NULL : &consumed);
    374     PyBuffer_Release(&pbuf);
    375     if (unicode == NULL)
    376         return NULL;
    377     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
    378     Py_DECREF(unicode);
    379     return tuple;
    380 }
    381 
    382 static PyObject *
    383 utf_32_decode(PyObject *self,
    384             PyObject *args)
    385 {
    386     Py_buffer pbuf;
    387     const char *errors = NULL;
    388     int byteorder = 0;
    389     int final = 0;
    390     Py_ssize_t consumed;
    391     PyObject *decoded;
    392 
    393     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
    394                           &pbuf, &errors, &final))
    395         return NULL;
    396     consumed = pbuf.len; /* This is overwritten unless final is true. */
    397     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
    398                                         &byteorder, final ? NULL : &consumed);
    399     PyBuffer_Release(&pbuf);
    400     if (decoded == NULL)
    401         return NULL;
    402     return codec_tuple(decoded, consumed);
    403 }
    404 
    405 static PyObject *
    406 utf_32_le_decode(PyObject *self,
    407                  PyObject *args)
    408 {
    409     Py_buffer pbuf;
    410     const char *errors = NULL;
    411     int byteorder = -1;
    412     int final = 0;
    413     Py_ssize_t consumed;
    414     PyObject *decoded;
    415 
    416     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
    417                           &pbuf, &errors, &final))
    418         return NULL;
    419     consumed = pbuf.len; /* This is overwritten unless final is true. */
    420     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
    421                                         &byteorder, final ? NULL : &consumed);
    422     PyBuffer_Release(&pbuf);
    423     if (decoded == NULL)
    424         return NULL;
    425     return codec_tuple(decoded, consumed);
    426 }
    427 
    428 static PyObject *
    429 utf_32_be_decode(PyObject *self,
    430                  PyObject *args)
    431 {
    432     Py_buffer pbuf;
    433     const char *errors = NULL;
    434     int byteorder = 1;
    435     int final = 0;
    436     Py_ssize_t consumed;
    437     PyObject *decoded;
    438 
    439     if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
    440                           &pbuf, &errors, &final))
    441         return NULL;
    442     consumed = pbuf.len; /* This is overwritten unless final is true. */
    443     decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
    444                                         &byteorder, final ? NULL : &consumed);
    445     PyBuffer_Release(&pbuf);
    446     if (decoded == NULL)
    447         return NULL;
    448     return codec_tuple(decoded, consumed);
    449 }
    450 
    451 /* This non-standard version also provides access to the byteorder
    452    parameter of the builtin UTF-32 codec.
    453 
    454    It returns a tuple (unicode, bytesread, byteorder) with byteorder
    455    being the value in effect at the end of data.
    456 
    457 */
    458 
    459 static PyObject *
    460 utf_32_ex_decode(PyObject *self,
    461                  PyObject *args)
    462 {
    463     Py_buffer pbuf;
    464     const char *errors = NULL;
    465     int byteorder = 0;
    466     PyObject *unicode, *tuple;
    467     int final = 0;
    468     Py_ssize_t consumed;
    469 
    470     if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
    471                           &pbuf, &errors, &byteorder, &final))
    472         return NULL;
    473     consumed = pbuf.len; /* This is overwritten unless final is true. */
    474     unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
    475                                         &byteorder, final ? NULL : &consumed);
    476     PyBuffer_Release(&pbuf);
    477     if (unicode == NULL)
    478         return NULL;
    479     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
    480     Py_DECREF(unicode);
    481     return tuple;
    482 }
    483 
    484 static PyObject *
    485 unicode_escape_decode(PyObject *self,
    486                      PyObject *args)
    487 {
    488     Py_buffer pbuf;
    489     const char *errors = NULL;
    490         PyObject *unicode;
    491 
    492     if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
    493                           &pbuf, &errors))
    494         return NULL;
    495 
    496     unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
    497     PyBuffer_Release(&pbuf);
    498     return codec_tuple(unicode, pbuf.len);
    499 }
    500 
    501 static PyObject *
    502 raw_unicode_escape_decode(PyObject *self,
    503                         PyObject *args)
    504 {
    505     Py_buffer pbuf;
    506     const char *errors = NULL;
    507     PyObject *unicode;
    508 
    509     if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
    510                           &pbuf, &errors))
    511         return NULL;
    512 
    513     unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
    514     PyBuffer_Release(&pbuf);
    515     return codec_tuple(unicode, pbuf.len);
    516 }
    517 
    518 static PyObject *
    519 latin_1_decode(PyObject *self,
    520                PyObject *args)
    521 {
    522     Py_buffer pbuf;
    523     PyObject *unicode;
    524     const char *errors = NULL;
    525 
    526     if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
    527                           &pbuf, &errors))
    528         return NULL;
    529 
    530     unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
    531     PyBuffer_Release(&pbuf);
    532     return codec_tuple(unicode, pbuf.len);
    533 }
    534 
    535 static PyObject *
    536 ascii_decode(PyObject *self,
    537              PyObject *args)
    538 {
    539     Py_buffer pbuf;
    540     PyObject *unicode;
    541     const char *errors = NULL;
    542 
    543     if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
    544                           &pbuf, &errors))
    545         return NULL;
    546 
    547     unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
    548     PyBuffer_Release(&pbuf);
    549     return codec_tuple(unicode, pbuf.len);
    550 }
    551 
    552 static PyObject *
    553 charmap_decode(PyObject *self,
    554                PyObject *args)
    555 {
    556     Py_buffer pbuf;
    557     PyObject *unicode;
    558     const char *errors = NULL;
    559     PyObject *mapping = NULL;
    560 
    561     if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
    562                           &pbuf, &errors, &mapping))
    563         return NULL;
    564     if (mapping == Py_None)
    565         mapping = NULL;
    566 
    567     unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
    568     PyBuffer_Release(&pbuf);
    569     return codec_tuple(unicode, pbuf.len);
    570 }
    571 
    572 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
    573 
    574 static PyObject *
    575 mbcs_decode(PyObject *self,
    576             PyObject *args)
    577 {
    578     Py_buffer pbuf;
    579     const char *errors = NULL;
    580     int final = 0;
    581     Py_ssize_t consumed;
    582     PyObject *decoded = NULL;
    583 
    584     if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
    585                           &pbuf, &errors, &final))
    586         return NULL;
    587     consumed = pbuf.len;
    588 
    589     decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
    590                                            final ? NULL : &consumed);
    591     PyBuffer_Release(&pbuf);
    592     if (decoded == NULL)
    593         return NULL;
    594     return codec_tuple(decoded, consumed);
    595 }
    596 
    597 #endif /* MS_WINDOWS */
    598 
    599 /* --- Encoder ------------------------------------------------------------ */
    600 
    601 static PyObject *
    602 readbuffer_encode(PyObject *self,
    603                   PyObject *args)
    604 {
    605     const char *data;
    606     Py_ssize_t size;
    607     const char *errors = NULL;
    608 
    609     if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
    610                           &data, &size, &errors))
    611         return NULL;
    612 
    613     return codec_tuple(PyString_FromStringAndSize(data, size),
    614                        size);
    615 }
    616 
    617 static PyObject *
    618 charbuffer_encode(PyObject *self,
    619                   PyObject *args)
    620 {
    621     const char *data;
    622     Py_ssize_t size;
    623     const char *errors = NULL;
    624 
    625     if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
    626                           &data, &size, &errors))
    627         return NULL;
    628 
    629     return codec_tuple(PyString_FromStringAndSize(data, size),
    630                        size);
    631 }
    632 
    633 static PyObject *
    634 unicode_internal_encode(PyObject *self,
    635                         PyObject *args)
    636 {
    637     PyObject *obj;
    638     const char *errors = NULL;
    639     const char *data;
    640     Py_ssize_t size;
    641 
    642     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
    643                           &obj, &errors))
    644         return NULL;
    645 
    646     if (PyUnicode_Check(obj)) {
    647         data = PyUnicode_AS_DATA(obj);
    648         size = PyUnicode_GET_DATA_SIZE(obj);
    649         return codec_tuple(PyString_FromStringAndSize(data, size),
    650                            PyUnicode_GET_SIZE(obj));
    651     }
    652     else {
    653         if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
    654             return NULL;
    655         return codec_tuple(PyString_FromStringAndSize(data, size),
    656                            size);
    657     }
    658 }
    659 
    660 static PyObject *
    661 utf_7_encode(PyObject *self,
    662             PyObject *args)
    663 {
    664     PyObject *str, *v;
    665     const char *errors = NULL;
    666 
    667     if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
    668                           &str, &errors))
    669         return NULL;
    670 
    671     str = PyUnicode_FromObject(str);
    672     if (str == NULL)
    673         return NULL;
    674     v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
    675                                          PyUnicode_GET_SIZE(str),
    676                                          0,
    677                                          0,
    678                                          errors),
    679                     PyUnicode_GET_SIZE(str));
    680     Py_DECREF(str);
    681     return v;
    682 }
    683 
    684 static PyObject *
    685 utf_8_encode(PyObject *self,
    686             PyObject *args)
    687 {
    688     PyObject *str, *v;
    689     const char *errors = NULL;
    690 
    691     if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
    692                           &str, &errors))
    693         return NULL;
    694 
    695     str = PyUnicode_FromObject(str);
    696     if (str == NULL)
    697         return NULL;
    698     v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
    699                                          PyUnicode_GET_SIZE(str),
    700                                          errors),
    701                     PyUnicode_GET_SIZE(str));
    702     Py_DECREF(str);
    703     return v;
    704 }
    705 
    706 /* This version provides access to the byteorder parameter of the
    707    builtin UTF-16 codecs as optional third argument. It defaults to 0
    708    which means: use the native byte order and prepend the data with a
    709    BOM mark.
    710 
    711 */
    712 
    713 static PyObject *
    714 utf_16_encode(PyObject *self,
    715             PyObject *args)
    716 {
    717     PyObject *str, *v;
    718     const char *errors = NULL;
    719     int byteorder = 0;
    720 
    721     if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
    722                           &str, &errors, &byteorder))
    723         return NULL;
    724 
    725     str = PyUnicode_FromObject(str);
    726     if (str == NULL)
    727         return NULL;
    728     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
    729                                           PyUnicode_GET_SIZE(str),
    730                                           errors,
    731                                           byteorder),
    732                     PyUnicode_GET_SIZE(str));
    733     Py_DECREF(str);
    734     return v;
    735 }
    736 
    737 static PyObject *
    738 utf_16_le_encode(PyObject *self,
    739                  PyObject *args)
    740 {
    741     PyObject *str, *v;
    742     const char *errors = NULL;
    743 
    744     if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
    745                           &str, &errors))
    746         return NULL;
    747 
    748     str = PyUnicode_FromObject(str);
    749     if (str == NULL)
    750         return NULL;
    751     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
    752                                              PyUnicode_GET_SIZE(str),
    753                                              errors,
    754                                              -1),
    755                        PyUnicode_GET_SIZE(str));
    756     Py_DECREF(str);
    757     return v;
    758 }
    759 
    760 static PyObject *
    761 utf_16_be_encode(PyObject *self,
    762                  PyObject *args)
    763 {
    764     PyObject *str, *v;
    765     const char *errors = NULL;
    766 
    767     if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
    768                           &str, &errors))
    769         return NULL;
    770 
    771     str = PyUnicode_FromObject(str);
    772     if (str == NULL)
    773         return NULL;
    774     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
    775                                           PyUnicode_GET_SIZE(str),
    776                                           errors,
    777                                           +1),
    778                     PyUnicode_GET_SIZE(str));
    779     Py_DECREF(str);
    780     return v;
    781 }
    782 
    783 /* This version provides access to the byteorder parameter of the
    784    builtin UTF-32 codecs as optional third argument. It defaults to 0
    785    which means: use the native byte order and prepend the data with a
    786    BOM mark.
    787 
    788 */
    789 
    790 static PyObject *
    791 utf_32_encode(PyObject *self,
    792             PyObject *args)
    793 {
    794     PyObject *str, *v;
    795     const char *errors = NULL;
    796     int byteorder = 0;
    797 
    798     if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
    799                           &str, &errors, &byteorder))
    800         return NULL;
    801 
    802     str = PyUnicode_FromObject(str);
    803     if (str == NULL)
    804         return NULL;
    805     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
    806                                           PyUnicode_GET_SIZE(str),
    807                                           errors,
    808                                           byteorder),
    809                     PyUnicode_GET_SIZE(str));
    810     Py_DECREF(str);
    811     return v;
    812 }
    813 
    814 static PyObject *
    815 utf_32_le_encode(PyObject *self,
    816                  PyObject *args)
    817 {
    818     PyObject *str, *v;
    819     const char *errors = NULL;
    820 
    821     if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
    822                           &str, &errors))
    823         return NULL;
    824 
    825     str = PyUnicode_FromObject(str);
    826     if (str == NULL)
    827         return NULL;
    828     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
    829                                              PyUnicode_GET_SIZE(str),
    830                                              errors,
    831                                              -1),
    832                        PyUnicode_GET_SIZE(str));
    833     Py_DECREF(str);
    834     return v;
    835 }
    836 
    837 static PyObject *
    838 utf_32_be_encode(PyObject *self,
    839                  PyObject *args)
    840 {
    841     PyObject *str, *v;
    842     const char *errors = NULL;
    843 
    844     if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
    845                           &str, &errors))
    846         return NULL;
    847 
    848     str = PyUnicode_FromObject(str);
    849     if (str == NULL)
    850         return NULL;
    851     v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
    852                                           PyUnicode_GET_SIZE(str),
    853                                           errors,
    854                                           +1),
    855                     PyUnicode_GET_SIZE(str));
    856     Py_DECREF(str);
    857     return v;
    858 }
    859 
    860 static PyObject *
    861 unicode_escape_encode(PyObject *self,
    862                      PyObject *args)
    863 {
    864     PyObject *str, *v;
    865     const char *errors = NULL;
    866 
    867     if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
    868                           &str, &errors))
    869         return NULL;
    870 
    871     str = PyUnicode_FromObject(str);
    872     if (str == NULL)
    873         return NULL;
    874     v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
    875                                                   PyUnicode_GET_SIZE(str)),
    876                     PyUnicode_GET_SIZE(str));
    877     Py_DECREF(str);
    878     return v;
    879 }
    880 
    881 static PyObject *
    882 raw_unicode_escape_encode(PyObject *self,
    883                         PyObject *args)
    884 {
    885     PyObject *str, *v;
    886     const char *errors = NULL;
    887 
    888     if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
    889                           &str, &errors))
    890         return NULL;
    891 
    892     str = PyUnicode_FromObject(str);
    893     if (str == NULL)
    894         return NULL;
    895     v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
    896                                PyUnicode_AS_UNICODE(str),
    897                                PyUnicode_GET_SIZE(str)),
    898                     PyUnicode_GET_SIZE(str));
    899     Py_DECREF(str);
    900     return v;
    901 }
    902 
    903 static PyObject *
    904 latin_1_encode(PyObject *self,
    905                PyObject *args)
    906 {
    907     PyObject *str, *v;
    908     const char *errors = NULL;
    909 
    910     if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
    911                           &str, &errors))
    912         return NULL;
    913 
    914     str = PyUnicode_FromObject(str);
    915     if (str == NULL)
    916         return NULL;
    917     v = codec_tuple(PyUnicode_EncodeLatin1(
    918                                PyUnicode_AS_UNICODE(str),
    919                                PyUnicode_GET_SIZE(str),
    920                                errors),
    921                     PyUnicode_GET_SIZE(str));
    922     Py_DECREF(str);
    923     return v;
    924 }
    925 
    926 static PyObject *
    927 ascii_encode(PyObject *self,
    928              PyObject *args)
    929 {
    930     PyObject *str, *v;
    931     const char *errors = NULL;
    932 
    933     if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
    934                           &str, &errors))
    935         return NULL;
    936 
    937     str = PyUnicode_FromObject(str);
    938     if (str == NULL)
    939         return NULL;
    940     v = codec_tuple(PyUnicode_EncodeASCII(
    941                                PyUnicode_AS_UNICODE(str),
    942                                PyUnicode_GET_SIZE(str),
    943                                errors),
    944                     PyUnicode_GET_SIZE(str));
    945     Py_DECREF(str);
    946     return v;
    947 }
    948 
    949 static PyObject *
    950 charmap_encode(PyObject *self,
    951              PyObject *args)
    952 {
    953     PyObject *str, *v;
    954     const char *errors = NULL;
    955     PyObject *mapping = NULL;
    956 
    957     if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
    958                           &str, &errors, &mapping))
    959         return NULL;
    960     if (mapping == Py_None)
    961         mapping = NULL;
    962 
    963     str = PyUnicode_FromObject(str);
    964     if (str == NULL)
    965         return NULL;
    966     v = codec_tuple(PyUnicode_EncodeCharmap(
    967                                PyUnicode_AS_UNICODE(str),
    968                                PyUnicode_GET_SIZE(str),
    969                                mapping,
    970                                errors),
    971                     PyUnicode_GET_SIZE(str));
    972     Py_DECREF(str);
    973     return v;
    974 }
    975 
    976 static PyObject*
    977 charmap_build(PyObject *self, PyObject *args)
    978 {
    979     PyObject *map;
    980     if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
    981         return NULL;
    982     return PyUnicode_BuildEncodingMap(map);
    983 }
    984 
    985 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
    986 
    987 static PyObject *
    988 mbcs_encode(PyObject *self,
    989             PyObject *args)
    990 {
    991     PyObject *str, *v;
    992     const char *errors = NULL;
    993 
    994     if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
    995                           &str, &errors))
    996         return NULL;
    997 
    998     str = PyUnicode_FromObject(str);
    999     if (str == NULL)
   1000         return NULL;
   1001     v = codec_tuple(PyUnicode_EncodeMBCS(
   1002                                PyUnicode_AS_UNICODE(str),
   1003                                PyUnicode_GET_SIZE(str),
   1004                                errors),
   1005                     PyUnicode_GET_SIZE(str));
   1006     Py_DECREF(str);
   1007     return v;
   1008 }
   1009 
   1010 #endif /* MS_WINDOWS */
   1011 #endif /* Py_USING_UNICODE */
   1012 
   1013 /* --- Error handler registry --------------------------------------------- */
   1014 
   1015 PyDoc_STRVAR(register_error__doc__,
   1016 "register_error(errors, handler)\n\
   1017 \n\
   1018 Register the specified error handler under the name\n\
   1019 errors. handler must be a callable object, that\n\
   1020 will be called with an exception instance containing\n\
   1021 information about the location of the encoding/decoding\n\
   1022 error and must return a (replacement, new position) tuple.");
   1023 
   1024 static PyObject *register_error(PyObject *self, PyObject *args)
   1025 {
   1026     const char *name;
   1027     PyObject *handler;
   1028 
   1029     if (!PyArg_ParseTuple(args, "sO:register_error",
   1030                           &name, &handler))
   1031         return NULL;
   1032     if (PyCodec_RegisterError(name, handler))
   1033         return NULL;
   1034     Py_RETURN_NONE;
   1035 }
   1036 
   1037 PyDoc_STRVAR(lookup_error__doc__,
   1038 "lookup_error(errors) -> handler\n\
   1039 \n\
   1040 Return the error handler for the specified error handling name\n\
   1041 or raise a LookupError, if no handler exists under this name.");
   1042 
   1043 static PyObject *lookup_error(PyObject *self, PyObject *args)
   1044 {
   1045     const char *name;
   1046 
   1047     if (!PyArg_ParseTuple(args, "s:lookup_error",
   1048                           &name))
   1049         return NULL;
   1050     return PyCodec_LookupError(name);
   1051 }
   1052 
   1053 /* --- Module API --------------------------------------------------------- */
   1054 
   1055 static PyMethodDef _codecs_functions[] = {
   1056     {"register",                codec_register,                 METH_O,
   1057         register__doc__},
   1058     {"lookup",                  codec_lookup,                   METH_VARARGS,
   1059         lookup__doc__},
   1060     {"encode",                  codec_encode,                   METH_VARARGS,
   1061         encode__doc__},
   1062     {"decode",                  codec_decode,                   METH_VARARGS,
   1063         decode__doc__},
   1064     {"escape_encode",           escape_encode,                  METH_VARARGS},
   1065     {"escape_decode",           escape_decode,                  METH_VARARGS},
   1066 #ifdef Py_USING_UNICODE
   1067     {"utf_8_encode",            utf_8_encode,                   METH_VARARGS},
   1068     {"utf_8_decode",            utf_8_decode,                   METH_VARARGS},
   1069     {"utf_7_encode",            utf_7_encode,                   METH_VARARGS},
   1070     {"utf_7_decode",            utf_7_decode,                   METH_VARARGS},
   1071     {"utf_16_encode",           utf_16_encode,                  METH_VARARGS},
   1072     {"utf_16_le_encode",        utf_16_le_encode,               METH_VARARGS},
   1073     {"utf_16_be_encode",        utf_16_be_encode,               METH_VARARGS},
   1074     {"utf_16_decode",           utf_16_decode,                  METH_VARARGS},
   1075     {"utf_16_le_decode",        utf_16_le_decode,               METH_VARARGS},
   1076     {"utf_16_be_decode",        utf_16_be_decode,               METH_VARARGS},
   1077     {"utf_16_ex_decode",        utf_16_ex_decode,               METH_VARARGS},
   1078     {"utf_32_encode",           utf_32_encode,                  METH_VARARGS},
   1079     {"utf_32_le_encode",        utf_32_le_encode,               METH_VARARGS},
   1080     {"utf_32_be_encode",        utf_32_be_encode,               METH_VARARGS},
   1081     {"utf_32_decode",           utf_32_decode,                  METH_VARARGS},
   1082     {"utf_32_le_decode",        utf_32_le_decode,               METH_VARARGS},
   1083     {"utf_32_be_decode",        utf_32_be_decode,               METH_VARARGS},
   1084     {"utf_32_ex_decode",        utf_32_ex_decode,               METH_VARARGS},
   1085     {"unicode_escape_encode",   unicode_escape_encode,          METH_VARARGS},
   1086     {"unicode_escape_decode",   unicode_escape_decode,          METH_VARARGS},
   1087     {"unicode_internal_encode", unicode_internal_encode,        METH_VARARGS},
   1088     {"unicode_internal_decode", unicode_internal_decode,        METH_VARARGS},
   1089     {"raw_unicode_escape_encode", raw_unicode_escape_encode,    METH_VARARGS},
   1090     {"raw_unicode_escape_decode", raw_unicode_escape_decode,    METH_VARARGS},
   1091     {"latin_1_encode",          latin_1_encode,                 METH_VARARGS},
   1092     {"latin_1_decode",          latin_1_decode,                 METH_VARARGS},
   1093     {"ascii_encode",            ascii_encode,                   METH_VARARGS},
   1094     {"ascii_decode",            ascii_decode,                   METH_VARARGS},
   1095     {"charmap_encode",          charmap_encode,                 METH_VARARGS},
   1096     {"charmap_decode",          charmap_decode,                 METH_VARARGS},
   1097     {"charmap_build",           charmap_build,                  METH_VARARGS},
   1098     {"readbuffer_encode",       readbuffer_encode,              METH_VARARGS},
   1099     {"charbuffer_encode",       charbuffer_encode,              METH_VARARGS},
   1100 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
   1101     {"mbcs_encode",             mbcs_encode,                    METH_VARARGS},
   1102     {"mbcs_decode",             mbcs_decode,                    METH_VARARGS},
   1103 #endif
   1104 #endif /* Py_USING_UNICODE */
   1105     {"register_error",          register_error,                 METH_VARARGS,
   1106         register_error__doc__},
   1107     {"lookup_error",            lookup_error,                   METH_VARARGS,
   1108         lookup_error__doc__},
   1109     {NULL, NULL}                /* sentinel */
   1110 };
   1111 
   1112 PyMODINIT_FUNC
   1113 init_codecs(void)
   1114 {
   1115     Py_InitModule("_codecs", _codecs_functions);
   1116 }
   1117