Home | History | Annotate | Download | only in Python
      1 /* ------------------------------------------------------------------------
      2 
      3    Python Codec Registry and support functions
      4 
      5 Written by Marc-Andre Lemburg (mal (at) lemburg.com).
      6 
      7 Copyright (c) Corporation for National Research Initiatives.
      8 
      9    ------------------------------------------------------------------------ */
     10 
     11 #include "Python.h"
     12 #include <ctype.h>
     13 
     14 /* --- Codec Registry ----------------------------------------------------- */
     15 
     16 /* Import the standard encodings package which will register the first
     17    codec search function.
     18 
     19    This is done in a lazy way so that the Unicode implementation does
     20    not downgrade startup time of scripts not needing it.
     21 
     22    ImportErrors are silently ignored by this function. Only one try is
     23    made.
     24 
     25 */
     26 
     27 static int _PyCodecRegistry_Init(void); /* Forward */
     28 
     29 int PyCodec_Register(PyObject *search_function)
     30 {
     31     PyInterpreterState *interp = PyThreadState_GET()->interp;
     32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
     33         goto onError;
     34     if (search_function == NULL) {
     35         PyErr_BadArgument();
     36         goto onError;
     37     }
     38     if (!PyCallable_Check(search_function)) {
     39         PyErr_SetString(PyExc_TypeError, "argument must be callable");
     40         goto onError;
     41     }
     42     return PyList_Append(interp->codec_search_path, search_function);
     43 
     44  onError:
     45     return -1;
     46 }
     47 
     48 /* Convert a string to a normalized Python string: all characters are
     49    converted to lower case, spaces are replaced with underscores. */
     50 
     51 static
     52 PyObject *normalizestring(const char *string)
     53 {
     54     register size_t i;
     55     size_t len = strlen(string);
     56     char *p;
     57     PyObject *v;
     58 
     59     if (len > PY_SSIZE_T_MAX) {
     60         PyErr_SetString(PyExc_OverflowError, "string is too large");
     61         return NULL;
     62     }
     63 
     64     v = PyString_FromStringAndSize(NULL, len);
     65     if (v == NULL)
     66         return NULL;
     67     p = PyString_AS_STRING(v);
     68     for (i = 0; i < len; i++) {
     69         register char ch = string[i];
     70         if (ch == ' ')
     71             ch = '-';
     72         else
     73             ch = tolower(Py_CHARMASK(ch));
     74         p[i] = ch;
     75     }
     76     return v;
     77 }
     78 
     79 /* Lookup the given encoding and return a tuple providing the codec
     80    facilities.
     81 
     82    The encoding string is looked up converted to all lower-case
     83    characters. This makes encodings looked up through this mechanism
     84    effectively case-insensitive.
     85 
     86    If no codec is found, a LookupError is set and NULL returned.
     87 
     88    As side effect, this tries to load the encodings package, if not
     89    yet done. This is part of the lazy load strategy for the encodings
     90    package.
     91 
     92 */
     93 
     94 PyObject *_PyCodec_Lookup(const char *encoding)
     95 {
     96     PyInterpreterState *interp;
     97     PyObject *result, *args = NULL, *v;
     98     Py_ssize_t i, len;
     99 
    100     if (encoding == NULL) {
    101         PyErr_BadArgument();
    102         goto onError;
    103     }
    104 
    105     interp = PyThreadState_GET()->interp;
    106     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
    107         goto onError;
    108 
    109     /* Convert the encoding to a normalized Python string: all
    110        characters are converted to lower case, spaces and hyphens are
    111        replaced with underscores. */
    112     v = normalizestring(encoding);
    113     if (v == NULL)
    114         goto onError;
    115     PyString_InternInPlace(&v);
    116 
    117     /* First, try to lookup the name in the registry dictionary */
    118     result = PyDict_GetItem(interp->codec_search_cache, v);
    119     if (result != NULL) {
    120         Py_INCREF(result);
    121         Py_DECREF(v);
    122         return result;
    123     }
    124 
    125     /* Next, scan the search functions in order of registration */
    126     args = PyTuple_New(1);
    127     if (args == NULL)
    128         goto onError;
    129     PyTuple_SET_ITEM(args,0,v);
    130 
    131     len = PyList_Size(interp->codec_search_path);
    132     if (len < 0)
    133         goto onError;
    134     if (len == 0) {
    135         PyErr_SetString(PyExc_LookupError,
    136                         "no codec search functions registered: "
    137                         "can't find encoding");
    138         goto onError;
    139     }
    140 
    141     for (i = 0; i < len; i++) {
    142         PyObject *func;
    143 
    144         func = PyList_GetItem(interp->codec_search_path, i);
    145         if (func == NULL)
    146             goto onError;
    147         result = PyEval_CallObject(func, args);
    148         if (result == NULL)
    149             goto onError;
    150         if (result == Py_None) {
    151             Py_DECREF(result);
    152             continue;
    153         }
    154         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
    155             PyErr_SetString(PyExc_TypeError,
    156                             "codec search functions must return 4-tuples");
    157             Py_DECREF(result);
    158             goto onError;
    159         }
    160         break;
    161     }
    162     if (i == len) {
    163         /* XXX Perhaps we should cache misses too ? */
    164         PyErr_Format(PyExc_LookupError,
    165                      "unknown encoding: %s", encoding);
    166         goto onError;
    167     }
    168 
    169     /* Cache and return the result */
    170     PyDict_SetItem(interp->codec_search_cache, v, result);
    171     Py_DECREF(args);
    172     return result;
    173 
    174  onError:
    175     Py_XDECREF(args);
    176     return NULL;
    177 }
    178 
    179 static
    180 PyObject *args_tuple(PyObject *object,
    181                      const char *errors)
    182 {
    183     PyObject *args;
    184 
    185     args = PyTuple_New(1 + (errors != NULL));
    186     if (args == NULL)
    187         return NULL;
    188     Py_INCREF(object);
    189     PyTuple_SET_ITEM(args,0,object);
    190     if (errors) {
    191         PyObject *v;
    192 
    193         v = PyString_FromString(errors);
    194         if (v == NULL) {
    195             Py_DECREF(args);
    196             return NULL;
    197         }
    198         PyTuple_SET_ITEM(args, 1, v);
    199     }
    200     return args;
    201 }
    202 
    203 /* Helper function to get a codec item */
    204 
    205 static
    206 PyObject *codec_getitem(const char *encoding, int index)
    207 {
    208     PyObject *codecs;
    209     PyObject *v;
    210 
    211     codecs = _PyCodec_Lookup(encoding);
    212     if (codecs == NULL)
    213         return NULL;
    214     v = PyTuple_GET_ITEM(codecs, index);
    215     Py_DECREF(codecs);
    216     Py_INCREF(v);
    217     return v;
    218 }
    219 
    220 /* Helper function to create an incremental codec. */
    221 
    222 static
    223 PyObject *codec_getincrementalcodec(const char *encoding,
    224                                     const char *errors,
    225                                     const char *attrname)
    226 {
    227     PyObject *codecs, *ret, *inccodec;
    228 
    229     codecs = _PyCodec_Lookup(encoding);
    230     if (codecs == NULL)
    231         return NULL;
    232     inccodec = PyObject_GetAttrString(codecs, attrname);
    233     Py_DECREF(codecs);
    234     if (inccodec == NULL)
    235         return NULL;
    236     if (errors)
    237         ret = PyObject_CallFunction(inccodec, "s", errors);
    238     else
    239         ret = PyObject_CallFunction(inccodec, NULL);
    240     Py_DECREF(inccodec);
    241     return ret;
    242 }
    243 
    244 /* Helper function to create a stream codec. */
    245 
    246 static
    247 PyObject *codec_getstreamcodec(const char *encoding,
    248                                PyObject *stream,
    249                                const char *errors,
    250                                const int index)
    251 {
    252     PyObject *codecs, *streamcodec, *codeccls;
    253 
    254     codecs = _PyCodec_Lookup(encoding);
    255     if (codecs == NULL)
    256         return NULL;
    257 
    258     codeccls = PyTuple_GET_ITEM(codecs, index);
    259     if (errors != NULL)
    260         streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
    261     else
    262         streamcodec = PyObject_CallFunction(codeccls, "O", stream);
    263     Py_DECREF(codecs);
    264     return streamcodec;
    265 }
    266 
    267 /* Convenience APIs to query the Codec registry.
    268 
    269    All APIs return a codec object with incremented refcount.
    270 
    271  */
    272 
    273 PyObject *PyCodec_Encoder(const char *encoding)
    274 {
    275     return codec_getitem(encoding, 0);
    276 }
    277 
    278 PyObject *PyCodec_Decoder(const char *encoding)
    279 {
    280     return codec_getitem(encoding, 1);
    281 }
    282 
    283 PyObject *PyCodec_IncrementalEncoder(const char *encoding,
    284                                      const char *errors)
    285 {
    286     return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
    287 }
    288 
    289 PyObject *PyCodec_IncrementalDecoder(const char *encoding,
    290                                      const char *errors)
    291 {
    292     return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
    293 }
    294 
    295 PyObject *PyCodec_StreamReader(const char *encoding,
    296                                PyObject *stream,
    297                                const char *errors)
    298 {
    299     return codec_getstreamcodec(encoding, stream, errors, 2);
    300 }
    301 
    302 PyObject *PyCodec_StreamWriter(const char *encoding,
    303                                PyObject *stream,
    304                                const char *errors)
    305 {
    306     return codec_getstreamcodec(encoding, stream, errors, 3);
    307 }
    308 
    309 /* Encode an object (e.g. an Unicode object) using the given encoding
    310    and return the resulting encoded object (usually a Python string).
    311 
    312    errors is passed to the encoder factory as argument if non-NULL. */
    313 
    314 PyObject *PyCodec_Encode(PyObject *object,
    315                          const char *encoding,
    316                          const char *errors)
    317 {
    318     PyObject *encoder = NULL;
    319     PyObject *args = NULL, *result = NULL;
    320     PyObject *v;
    321 
    322     encoder = PyCodec_Encoder(encoding);
    323     if (encoder == NULL)
    324         goto onError;
    325 
    326     args = args_tuple(object, errors);
    327     if (args == NULL)
    328         goto onError;
    329 
    330     result = PyEval_CallObject(encoder,args);
    331     if (result == NULL)
    332         goto onError;
    333 
    334     if (!PyTuple_Check(result) ||
    335         PyTuple_GET_SIZE(result) != 2) {
    336         PyErr_SetString(PyExc_TypeError,
    337                         "encoder must return a tuple (object,integer)");
    338         goto onError;
    339     }
    340     v = PyTuple_GET_ITEM(result,0);
    341     Py_INCREF(v);
    342     /* We don't check or use the second (integer) entry. */
    343 
    344     Py_DECREF(args);
    345     Py_DECREF(encoder);
    346     Py_DECREF(result);
    347     return v;
    348 
    349  onError:
    350     Py_XDECREF(result);
    351     Py_XDECREF(args);
    352     Py_XDECREF(encoder);
    353     return NULL;
    354 }
    355 
    356 /* Decode an object (usually a Python string) using the given encoding
    357    and return an equivalent object (e.g. an Unicode object).
    358 
    359    errors is passed to the decoder factory as argument if non-NULL. */
    360 
    361 PyObject *PyCodec_Decode(PyObject *object,
    362                          const char *encoding,
    363                          const char *errors)
    364 {
    365     PyObject *decoder = NULL;
    366     PyObject *args = NULL, *result = NULL;
    367     PyObject *v;
    368 
    369     decoder = PyCodec_Decoder(encoding);
    370     if (decoder == NULL)
    371         goto onError;
    372 
    373     args = args_tuple(object, errors);
    374     if (args == NULL)
    375         goto onError;
    376 
    377     result = PyEval_CallObject(decoder,args);
    378     if (result == NULL)
    379         goto onError;
    380     if (!PyTuple_Check(result) ||
    381         PyTuple_GET_SIZE(result) != 2) {
    382         PyErr_SetString(PyExc_TypeError,
    383                         "decoder must return a tuple (object,integer)");
    384         goto onError;
    385     }
    386     v = PyTuple_GET_ITEM(result,0);
    387     Py_INCREF(v);
    388     /* We don't check or use the second (integer) entry. */
    389 
    390     Py_DECREF(args);
    391     Py_DECREF(decoder);
    392     Py_DECREF(result);
    393     return v;
    394 
    395  onError:
    396     Py_XDECREF(args);
    397     Py_XDECREF(decoder);
    398     Py_XDECREF(result);
    399     return NULL;
    400 }
    401 
    402 /* Register the error handling callback function error under the name
    403    name. This function will be called by the codec when it encounters
    404    an unencodable characters/undecodable bytes and doesn't know the
    405    callback name, when name is specified as the error parameter
    406    in the call to the encode/decode function.
    407    Return 0 on success, -1 on error */
    408 int PyCodec_RegisterError(const char *name, PyObject *error)
    409 {
    410     PyInterpreterState *interp = PyThreadState_GET()->interp;
    411     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
    412         return -1;
    413     if (!PyCallable_Check(error)) {
    414         PyErr_SetString(PyExc_TypeError, "handler must be callable");
    415         return -1;
    416     }
    417     return PyDict_SetItemString(interp->codec_error_registry,
    418                                 (char *)name, error);
    419 }
    420 
    421 /* Lookup the error handling callback function registered under the
    422    name error. As a special case NULL can be passed, in which case
    423    the error handling callback for strict encoding will be returned. */
    424 PyObject *PyCodec_LookupError(const char *name)
    425 {
    426     PyObject *handler = NULL;
    427 
    428     PyInterpreterState *interp = PyThreadState_GET()->interp;
    429     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
    430         return NULL;
    431 
    432     if (name==NULL)
    433         name = "strict";
    434     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
    435     if (!handler)
    436         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
    437     else
    438         Py_INCREF(handler);
    439     return handler;
    440 }
    441 
    442 static void wrong_exception_type(PyObject *exc)
    443 {
    444     PyObject *type = PyObject_GetAttrString(exc, "__class__");
    445     if (type != NULL) {
    446         PyObject *name = PyObject_GetAttrString(type, "__name__");
    447         Py_DECREF(type);
    448         if (name != NULL) {
    449             PyObject *string = PyObject_Str(name);
    450             Py_DECREF(name);
    451             if (string != NULL) {
    452                 PyErr_Format(PyExc_TypeError,
    453                     "don't know how to handle %.400s in error callback",
    454                     PyString_AS_STRING(string));
    455                 Py_DECREF(string);
    456             }
    457         }
    458     }
    459 }
    460 
    461 PyObject *PyCodec_StrictErrors(PyObject *exc)
    462 {
    463     if (PyExceptionInstance_Check(exc))
    464         PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
    465     else
    466         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
    467     return NULL;
    468 }
    469 
    470 
    471 #ifdef Py_USING_UNICODE
    472 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
    473 {
    474     Py_ssize_t end;
    475     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
    476         if (PyUnicodeEncodeError_GetEnd(exc, &end))
    477             return NULL;
    478     }
    479     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
    480         if (PyUnicodeDecodeError_GetEnd(exc, &end))
    481             return NULL;
    482     }
    483     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
    484         if (PyUnicodeTranslateError_GetEnd(exc, &end))
    485             return NULL;
    486     }
    487     else {
    488         wrong_exception_type(exc);
    489         return NULL;
    490     }
    491     /* ouch: passing NULL, 0, pos gives None instead of u'' */
    492     return Py_BuildValue("(u#n)", &end, 0, end);
    493 }
    494 
    495 
    496 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
    497 {
    498     PyObject *restuple;
    499     Py_ssize_t start;
    500     Py_ssize_t end;
    501     Py_ssize_t i;
    502 
    503     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
    504         PyObject *res;
    505         Py_UNICODE *p;
    506         if (PyUnicodeEncodeError_GetStart(exc, &start))
    507             return NULL;
    508         if (PyUnicodeEncodeError_GetEnd(exc, &end))
    509             return NULL;
    510         res = PyUnicode_FromUnicode(NULL, end-start);
    511         if (res == NULL)
    512             return NULL;
    513         for (p = PyUnicode_AS_UNICODE(res), i = start;
    514             i<end; ++p, ++i)
    515             *p = '?';
    516         restuple = Py_BuildValue("(On)", res, end);
    517         Py_DECREF(res);
    518         return restuple;
    519     }
    520     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
    521         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
    522         if (PyUnicodeDecodeError_GetEnd(exc, &end))
    523             return NULL;
    524         return Py_BuildValue("(u#n)", &res, 1, end);
    525     }
    526     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
    527         PyObject *res;
    528         Py_UNICODE *p;
    529         if (PyUnicodeTranslateError_GetStart(exc, &start))
    530             return NULL;
    531         if (PyUnicodeTranslateError_GetEnd(exc, &end))
    532             return NULL;
    533         res = PyUnicode_FromUnicode(NULL, end-start);
    534         if (res == NULL)
    535             return NULL;
    536         for (p = PyUnicode_AS_UNICODE(res), i = start;
    537             i<end; ++p, ++i)
    538             *p = Py_UNICODE_REPLACEMENT_CHARACTER;
    539         restuple = Py_BuildValue("(On)", res, end);
    540         Py_DECREF(res);
    541         return restuple;
    542     }
    543     else {
    544         wrong_exception_type(exc);
    545         return NULL;
    546     }
    547 }
    548 
    549 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
    550 {
    551     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
    552         PyObject *restuple;
    553         PyObject *object;
    554         Py_ssize_t start;
    555         Py_ssize_t end;
    556         PyObject *res;
    557         Py_UNICODE *p;
    558         Py_UNICODE *startp;
    559         Py_UNICODE *outp;
    560         int ressize;
    561         if (PyUnicodeEncodeError_GetStart(exc, &start))
    562             return NULL;
    563         if (PyUnicodeEncodeError_GetEnd(exc, &end))
    564             return NULL;
    565         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
    566             return NULL;
    567         startp = PyUnicode_AS_UNICODE(object);
    568         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
    569             if (*p<10)
    570                 ressize += 2+1+1;
    571             else if (*p<100)
    572                 ressize += 2+2+1;
    573             else if (*p<1000)
    574                 ressize += 2+3+1;
    575             else if (*p<10000)
    576                 ressize += 2+4+1;
    577 #ifndef Py_UNICODE_WIDE
    578             else
    579                 ressize += 2+5+1;
    580 #else
    581             else if (*p<100000)
    582                 ressize += 2+5+1;
    583             else if (*p<1000000)
    584                 ressize += 2+6+1;
    585             else
    586                 ressize += 2+7+1;
    587 #endif
    588         }
    589         /* allocate replacement */
    590         res = PyUnicode_FromUnicode(NULL, ressize);
    591         if (res == NULL) {
    592             Py_DECREF(object);
    593             return NULL;
    594         }
    595         /* generate replacement */
    596         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
    597             p < startp+end; ++p) {
    598             Py_UNICODE c = *p;
    599             int digits;
    600             int base;
    601             *outp++ = '&';
    602             *outp++ = '#';
    603             if (*p<10) {
    604                 digits = 1;
    605                 base = 1;
    606             }
    607             else if (*p<100) {
    608                 digits = 2;
    609                 base = 10;
    610             }
    611             else if (*p<1000) {
    612                 digits = 3;
    613                 base = 100;
    614             }
    615             else if (*p<10000) {
    616                 digits = 4;
    617                 base = 1000;
    618             }
    619 #ifndef Py_UNICODE_WIDE
    620             else {
    621                 digits = 5;
    622                 base = 10000;
    623             }
    624 #else
    625             else if (*p<100000) {
    626                 digits = 5;
    627                 base = 10000;
    628             }
    629             else if (*p<1000000) {
    630                 digits = 6;
    631                 base = 100000;
    632             }
    633             else {
    634                 digits = 7;
    635                 base = 1000000;
    636             }
    637 #endif
    638             while (digits-->0) {
    639                 *outp++ = '0' + c/base;
    640                 c %= base;
    641                 base /= 10;
    642             }
    643             *outp++ = ';';
    644         }
    645         restuple = Py_BuildValue("(On)", res, end);
    646         Py_DECREF(res);
    647         Py_DECREF(object);
    648         return restuple;
    649     }
    650     else {
    651         wrong_exception_type(exc);
    652         return NULL;
    653     }
    654 }
    655 
    656 static Py_UNICODE hexdigits[] = {
    657     '0', '1', '2', '3', '4', '5', '6', '7',
    658     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
    659 };
    660 
    661 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
    662 {
    663     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
    664         PyObject *restuple;
    665         PyObject *object;
    666         Py_ssize_t start;
    667         Py_ssize_t end;
    668         PyObject *res;
    669         Py_UNICODE *p;
    670         Py_UNICODE *startp;
    671         Py_UNICODE *outp;
    672         int ressize;
    673         if (PyUnicodeEncodeError_GetStart(exc, &start))
    674             return NULL;
    675         if (PyUnicodeEncodeError_GetEnd(exc, &end))
    676             return NULL;
    677         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
    678             return NULL;
    679         startp = PyUnicode_AS_UNICODE(object);
    680         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
    681 #ifdef Py_UNICODE_WIDE
    682             if (*p >= 0x00010000)
    683                 ressize += 1+1+8;
    684             else
    685 #endif
    686             if (*p >= 0x100) {
    687                 ressize += 1+1+4;
    688             }
    689             else
    690                 ressize += 1+1+2;
    691         }
    692         res = PyUnicode_FromUnicode(NULL, ressize);
    693         if (res==NULL)
    694             return NULL;
    695         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
    696             p < startp+end; ++p) {
    697             Py_UNICODE c = *p;
    698             *outp++ = '\\';
    699 #ifdef Py_UNICODE_WIDE
    700             if (c >= 0x00010000) {
    701                 *outp++ = 'U';
    702                 *outp++ = hexdigits[(c>>28)&0xf];
    703                 *outp++ = hexdigits[(c>>24)&0xf];
    704                 *outp++ = hexdigits[(c>>20)&0xf];
    705                 *outp++ = hexdigits[(c>>16)&0xf];
    706                 *outp++ = hexdigits[(c>>12)&0xf];
    707                 *outp++ = hexdigits[(c>>8)&0xf];
    708             }
    709             else
    710 #endif
    711             if (c >= 0x100) {
    712                 *outp++ = 'u';
    713                 *outp++ = hexdigits[(c>>12)&0xf];
    714                 *outp++ = hexdigits[(c>>8)&0xf];
    715             }
    716             else
    717                 *outp++ = 'x';
    718             *outp++ = hexdigits[(c>>4)&0xf];
    719             *outp++ = hexdigits[c&0xf];
    720         }
    721 
    722         restuple = Py_BuildValue("(On)", res, end);
    723         Py_DECREF(res);
    724         Py_DECREF(object);
    725         return restuple;
    726     }
    727     else {
    728         wrong_exception_type(exc);
    729         return NULL;
    730     }
    731 }
    732 #endif
    733 
    734 static PyObject *strict_errors(PyObject *self, PyObject *exc)
    735 {
    736     return PyCodec_StrictErrors(exc);
    737 }
    738 
    739 
    740 #ifdef Py_USING_UNICODE
    741 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
    742 {
    743     return PyCodec_IgnoreErrors(exc);
    744 }
    745 
    746 
    747 static PyObject *replace_errors(PyObject *self, PyObject *exc)
    748 {
    749     return PyCodec_ReplaceErrors(exc);
    750 }
    751 
    752 
    753 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
    754 {
    755     return PyCodec_XMLCharRefReplaceErrors(exc);
    756 }
    757 
    758 
    759 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
    760 {
    761     return PyCodec_BackslashReplaceErrors(exc);
    762 }
    763 #endif
    764 
    765 static int _PyCodecRegistry_Init(void)
    766 {
    767     static struct {
    768         char *name;
    769         PyMethodDef def;
    770     } methods[] =
    771     {
    772         {
    773             "strict",
    774             {
    775                 "strict_errors",
    776                 strict_errors,
    777                 METH_O,
    778                 PyDoc_STR("Implements the 'strict' error handling, which "
    779                           "raises a UnicodeError on coding errors.")
    780             }
    781         },
    782 #ifdef Py_USING_UNICODE
    783         {
    784             "ignore",
    785             {
    786                 "ignore_errors",
    787                 ignore_errors,
    788                 METH_O,
    789                 PyDoc_STR("Implements the 'ignore' error handling, which "
    790                           "ignores malformed data and continues.")
    791             }
    792         },
    793         {
    794             "replace",
    795             {
    796                 "replace_errors",
    797                 replace_errors,
    798                 METH_O,
    799                 PyDoc_STR("Implements the 'replace' error handling, which "
    800                           "replaces malformed data with a replacement marker.")
    801             }
    802         },
    803         {
    804             "xmlcharrefreplace",
    805             {
    806                 "xmlcharrefreplace_errors",
    807                 xmlcharrefreplace_errors,
    808                 METH_O,
    809                 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
    810                           "which replaces an unencodable character with the "
    811                           "appropriate XML character reference.")
    812             }
    813         },
    814         {
    815             "backslashreplace",
    816             {
    817                 "backslashreplace_errors",
    818                 backslashreplace_errors,
    819                 METH_O,
    820                 PyDoc_STR("Implements the 'backslashreplace' error handling, "
    821                           "which replaces an unencodable character with a "
    822                           "backslashed escape sequence.")
    823             }
    824         }
    825 #endif
    826     };
    827 
    828     PyInterpreterState *interp = PyThreadState_GET()->interp;
    829     PyObject *mod;
    830     unsigned i;
    831 
    832     if (interp->codec_search_path != NULL)
    833         return 0;
    834 
    835     interp->codec_search_path = PyList_New(0);
    836     interp->codec_search_cache = PyDict_New();
    837     interp->codec_error_registry = PyDict_New();
    838 
    839     if (interp->codec_error_registry) {
    840         for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
    841             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
    842             int res;
    843             if (!func)
    844                 Py_FatalError("can't initialize codec error registry");
    845             res = PyCodec_RegisterError(methods[i].name, func);
    846             Py_DECREF(func);
    847             if (res)
    848                 Py_FatalError("can't initialize codec error registry");
    849         }
    850     }
    851 
    852     if (interp->codec_search_path == NULL ||
    853         interp->codec_search_cache == NULL ||
    854         interp->codec_error_registry == NULL)
    855         Py_FatalError("can't initialize codec registry");
    856 
    857     mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
    858     if (mod == NULL) {
    859         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
    860             /* Ignore ImportErrors... this is done so that
    861                distributions can disable the encodings package. Note
    862                that other errors are not masked, e.g. SystemErrors
    863                raised to inform the user of an error in the Python
    864                configuration are still reported back to the user. */
    865             PyErr_Clear();
    866             return 0;
    867         }
    868         return -1;
    869     }
    870     Py_DECREF(mod);
    871     return 0;
    872 }
    873