Home | History | Annotate | Download | only in Include
      1 #ifndef Py_UNICODEOBJECT_H
      2 #define Py_UNICODEOBJECT_H
      3 
      4 #include <stdarg.h>
      5 
      6 /*
      7 
      8 Unicode implementation based on original code by Fredrik Lundh,
      9 modified by Marc-Andre Lemburg (mal (at) lemburg.com) according to the
     10 Unicode Integration Proposal. (See
     11 http://www.egenix.com/files/python/unicode-proposal.txt).
     12 
     13 Copyright (c) Corporation for National Research Initiatives.
     14 
     15 
     16  Original header:
     17  --------------------------------------------------------------------
     18 
     19  * Yet another Unicode string type for Python.  This type supports the
     20  * 16-bit Basic Multilingual Plane (BMP) only.
     21  *
     22  * Written by Fredrik Lundh, January 1999.
     23  *
     24  * Copyright (c) 1999 by Secret Labs AB.
     25  * Copyright (c) 1999 by Fredrik Lundh.
     26  *
     27  * fredrik (at) pythonware.com
     28  * http://www.pythonware.com
     29  *
     30  * --------------------------------------------------------------------
     31  * This Unicode String Type is
     32  *
     33  * Copyright (c) 1999 by Secret Labs AB
     34  * Copyright (c) 1999 by Fredrik Lundh
     35  *
     36  * By obtaining, using, and/or copying this software and/or its
     37  * associated documentation, you agree that you have read, understood,
     38  * and will comply with the following terms and conditions:
     39  *
     40  * Permission to use, copy, modify, and distribute this software and its
     41  * associated documentation for any purpose and without fee is hereby
     42  * granted, provided that the above copyright notice appears in all
     43  * copies, and that both that copyright notice and this permission notice
     44  * appear in supporting documentation, and that the name of Secret Labs
     45  * AB or the author not be used in advertising or publicity pertaining to
     46  * distribution of the software without specific, written prior
     47  * permission.
     48  *
     49  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
     50  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
     51  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
     52  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     53  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     54  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
     55  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     56  * -------------------------------------------------------------------- */
     57 
     58 #include <ctype.h>
     59 
     60 /* === Internal API ======================================================= */
     61 
     62 /* --- Internal Unicode Format -------------------------------------------- */
     63 
     64 /* Python 3.x requires unicode */
     65 #define Py_USING_UNICODE
     66 
     67 #ifndef SIZEOF_WCHAR_T
     68 #error Must define SIZEOF_WCHAR_T
     69 #endif
     70 
     71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
     72 
     73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
     74    Otherwise, Unicode strings are stored as UCS-2 (with limited support
     75    for UTF-16) */
     76 
     77 #if Py_UNICODE_SIZE >= 4
     78 #define Py_UNICODE_WIDE
     79 #endif
     80 
     81 /* Set these flags if the platform has "wchar.h" and the
     82    wchar_t type is a 16-bit unsigned type */
     83 /* #define HAVE_WCHAR_H */
     84 /* #define HAVE_USABLE_WCHAR_T */
     85 
     86 /* Py_UNICODE was the native Unicode storage format (code unit) used by
     87    Python and represents a single Unicode element in the Unicode type.
     88    With PEP 393, Py_UNICODE is deprecated and replaced with a
     89    typedef to wchar_t. */
     90 
     91 #ifndef Py_LIMITED_API
     92 #define PY_UNICODE_TYPE wchar_t
     93 typedef wchar_t Py_UNICODE;
     94 #endif
     95 
     96 /* If the compiler provides a wchar_t type we try to support it
     97    through the interface functions PyUnicode_FromWideChar(),
     98    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
     99 
    100 #ifdef HAVE_USABLE_WCHAR_T
    101 # ifndef HAVE_WCHAR_H
    102 #  define HAVE_WCHAR_H
    103 # endif
    104 #endif
    105 
    106 #ifdef HAVE_WCHAR_H
    107 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
    108 # ifdef _HAVE_BSDI
    109 #  include <time.h>
    110 # endif
    111 #  include <wchar.h>
    112 #endif
    113 
    114 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
    115    unicode representations. */
    116 typedef uint32_t Py_UCS4;
    117 typedef uint16_t Py_UCS2;
    118 typedef uint8_t Py_UCS1;
    119 
    120 /* --- Internal Unicode Operations ---------------------------------------- */
    121 
    122 /* Since splitting on whitespace is an important use case, and
    123    whitespace in most situations is solely ASCII whitespace, we
    124    optimize for the common case by using a quick look-up table
    125    _Py_ascii_whitespace (see below) with an inlined check.
    126 
    127  */
    128 #ifndef Py_LIMITED_API
    129 #define Py_UNICODE_ISSPACE(ch) \
    130     ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
    131 
    132 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
    133 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
    134 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
    135 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
    136 
    137 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
    138 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
    139 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
    140 
    141 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
    142 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
    143 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
    144 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
    145 
    146 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
    147 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
    148 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
    149 
    150 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
    151 
    152 #define Py_UNICODE_ISALNUM(ch) \
    153        (Py_UNICODE_ISALPHA(ch) || \
    154     Py_UNICODE_ISDECIMAL(ch) || \
    155     Py_UNICODE_ISDIGIT(ch) || \
    156     Py_UNICODE_ISNUMERIC(ch))
    157 
    158 #define Py_UNICODE_COPY(target, source, length) \
    159     memcpy((target), (source), (length)*sizeof(Py_UNICODE))
    160 
    161 #define Py_UNICODE_FILL(target, value, length) \
    162     do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
    163         for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
    164     } while (0)
    165 
    166 /* macros to work with surrogates */
    167 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
    168 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
    169 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
    170 /* Join two surrogate characters and return a single Py_UCS4 value. */
    171 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
    172     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
    173       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
    174 /* high surrogate = top 10 bits added to D800 */
    175 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
    176 /* low surrogate = bottom 10 bits added to DC00 */
    177 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
    178 
    179 /* Check if substring matches at given offset.  The offset must be
    180    valid, and the substring must not be empty. */
    181 
    182 #define Py_UNICODE_MATCH(string, offset, substring) \
    183     ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
    184      ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
    185      !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
    186 
    187 #endif /* Py_LIMITED_API */
    188 
    189 #ifdef __cplusplus
    190 extern "C" {
    191 #endif
    192 
    193 /* --- Unicode Type ------------------------------------------------------- */
    194 
    195 #ifndef Py_LIMITED_API
    196 
    197 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
    198    structure. state.ascii and state.compact are set, and the data
    199    immediately follow the structure. utf8_length and wstr_length can be found
    200    in the length field; the utf8 pointer is equal to the data pointer. */
    201 typedef struct {
    202     /* There are 4 forms of Unicode strings:
    203 
    204        - compact ascii:
    205 
    206          * structure = PyASCIIObject
    207          * test: PyUnicode_IS_COMPACT_ASCII(op)
    208          * kind = PyUnicode_1BYTE_KIND
    209          * compact = 1
    210          * ascii = 1
    211          * ready = 1
    212          * (length is the length of the utf8 and wstr strings)
    213          * (data starts just after the structure)
    214          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
    215 
    216        - compact:
    217 
    218          * structure = PyCompactUnicodeObject
    219          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
    220          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
    221            PyUnicode_4BYTE_KIND
    222          * compact = 1
    223          * ready = 1
    224          * ascii = 0
    225          * utf8 is not shared with data
    226          * utf8_length = 0 if utf8 is NULL
    227          * wstr is shared with data and wstr_length=length
    228            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
    229            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
    230          * wstr_length = 0 if wstr is NULL
    231          * (data starts just after the structure)
    232 
    233        - legacy string, not ready:
    234 
    235          * structure = PyUnicodeObject
    236          * test: kind == PyUnicode_WCHAR_KIND
    237          * length = 0 (use wstr_length)
    238          * hash = -1
    239          * kind = PyUnicode_WCHAR_KIND
    240          * compact = 0
    241          * ascii = 0
    242          * ready = 0
    243          * interned = SSTATE_NOT_INTERNED
    244          * wstr is not NULL
    245          * data.any is NULL
    246          * utf8 is NULL
    247          * utf8_length = 0
    248 
    249        - legacy string, ready:
    250 
    251          * structure = PyUnicodeObject structure
    252          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
    253          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
    254            PyUnicode_4BYTE_KIND
    255          * compact = 0
    256          * ready = 1
    257          * data.any is not NULL
    258          * utf8 is shared and utf8_length = length with data.any if ascii = 1
    259          * utf8_length = 0 if utf8 is NULL
    260          * wstr is shared with data.any and wstr_length = length
    261            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
    262            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
    263          * wstr_length = 0 if wstr is NULL
    264 
    265        Compact strings use only one memory block (structure + characters),
    266        whereas legacy strings use one block for the structure and one block
    267        for characters.
    268 
    269        Legacy strings are created by PyUnicode_FromUnicode() and
    270        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
    271        when PyUnicode_READY() is called.
    272 
    273        See also _PyUnicode_CheckConsistency().
    274     */
    275     PyObject_HEAD
    276     Py_ssize_t length;          /* Number of code points in the string */
    277     Py_hash_t hash;             /* Hash value; -1 if not set */
    278     struct {
    279         /*
    280            SSTATE_NOT_INTERNED (0)
    281            SSTATE_INTERNED_MORTAL (1)
    282            SSTATE_INTERNED_IMMORTAL (2)
    283 
    284            If interned != SSTATE_NOT_INTERNED, the two references from the
    285            dictionary to this object are *not* counted in ob_refcnt.
    286          */
    287         unsigned int interned:2;
    288         /* Character size:
    289 
    290            - PyUnicode_WCHAR_KIND (0):
    291 
    292              * character type = wchar_t (16 or 32 bits, depending on the
    293                platform)
    294 
    295            - PyUnicode_1BYTE_KIND (1):
    296 
    297              * character type = Py_UCS1 (8 bits, unsigned)
    298              * all characters are in the range U+0000-U+00FF (latin1)
    299              * if ascii is set, all characters are in the range U+0000-U+007F
    300                (ASCII), otherwise at least one character is in the range
    301                U+0080-U+00FF
    302 
    303            - PyUnicode_2BYTE_KIND (2):
    304 
    305              * character type = Py_UCS2 (16 bits, unsigned)
    306              * all characters are in the range U+0000-U+FFFF (BMP)
    307              * at least one character is in the range U+0100-U+FFFF
    308 
    309            - PyUnicode_4BYTE_KIND (4):
    310 
    311              * character type = Py_UCS4 (32 bits, unsigned)
    312              * all characters are in the range U+0000-U+10FFFF
    313              * at least one character is in the range U+10000-U+10FFFF
    314          */
    315         unsigned int kind:3;
    316         /* Compact is with respect to the allocation scheme. Compact unicode
    317            objects only require one memory block while non-compact objects use
    318            one block for the PyUnicodeObject struct and another for its data
    319            buffer. */
    320         unsigned int compact:1;
    321         /* The string only contains characters in the range U+0000-U+007F (ASCII)
    322            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
    323            set, use the PyASCIIObject structure. */
    324         unsigned int ascii:1;
    325         /* The ready flag indicates whether the object layout is initialized
    326            completely. This means that this is either a compact object, or
    327            the data pointer is filled out. The bit is redundant, and helps
    328            to minimize the test in PyUnicode_IS_READY(). */
    329         unsigned int ready:1;
    330         /* Padding to ensure that PyUnicode_DATA() is always aligned to
    331            4 bytes (see issue #19537 on m68k). */
    332         unsigned int :24;
    333     } state;
    334     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
    335 } PyASCIIObject;
    336 
    337 /* Non-ASCII strings allocated through PyUnicode_New use the
    338    PyCompactUnicodeObject structure. state.compact is set, and the data
    339    immediately follow the structure. */
    340 typedef struct {
    341     PyASCIIObject _base;
    342     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
    343                                  * terminating \0. */
    344     char *utf8;                 /* UTF-8 representation (null-terminated) */
    345     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
    346                                  * surrogates count as two code points. */
    347 } PyCompactUnicodeObject;
    348 
    349 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
    350    PyUnicodeObject structure. The actual string data is initially in the wstr
    351    block, and copied into the data block using _PyUnicode_Ready. */
    352 typedef struct {
    353     PyCompactUnicodeObject _base;
    354     union {
    355         void *any;
    356         Py_UCS1 *latin1;
    357         Py_UCS2 *ucs2;
    358         Py_UCS4 *ucs4;
    359     } data;                     /* Canonical, smallest-form Unicode buffer */
    360 } PyUnicodeObject;
    361 #endif
    362 
    363 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
    364 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
    365 
    366 #define PyUnicode_Check(op) \
    367                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
    368 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
    369 
    370 /* Fast access macros */
    371 #ifndef Py_LIMITED_API
    372 
    373 #define PyUnicode_WSTR_LENGTH(op) \
    374     (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
    375      ((PyASCIIObject*)op)->length :                    \
    376      ((PyCompactUnicodeObject*)op)->wstr_length)
    377 
    378 /* Returns the deprecated Py_UNICODE representation's size in code units
    379    (this includes surrogate pairs as 2 units).
    380    If the Py_UNICODE representation is not available, it will be computed
    381    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
    382 
    383 #define PyUnicode_GET_SIZE(op)                       \
    384     (assert(PyUnicode_Check(op)),                    \
    385      (((PyASCIIObject *)(op))->wstr) ?               \
    386       PyUnicode_WSTR_LENGTH(op) :                    \
    387       ((void)PyUnicode_AsUnicode((PyObject *)(op)),  \
    388        assert(((PyASCIIObject *)(op))->wstr),        \
    389        PyUnicode_WSTR_LENGTH(op)))
    390 
    391 #define PyUnicode_GET_DATA_SIZE(op) \
    392     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
    393 
    394 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
    395    representation on demand.  Using this macro is very inefficient now,
    396    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
    397    use PyUnicode_WRITE() and PyUnicode_READ(). */
    398 
    399 #define PyUnicode_AS_UNICODE(op) \
    400     (assert(PyUnicode_Check(op)), \
    401      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
    402       PyUnicode_AsUnicode((PyObject *)(op)))
    403 
    404 #define PyUnicode_AS_DATA(op) \
    405     ((const char *)(PyUnicode_AS_UNICODE(op)))
    406 
    407 
    408 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
    409 
    410 /* Values for PyASCIIObject.state: */
    411 
    412 /* Interning state. */
    413 #define SSTATE_NOT_INTERNED 0
    414 #define SSTATE_INTERNED_MORTAL 1
    415 #define SSTATE_INTERNED_IMMORTAL 2
    416 
    417 /* Return true if the string contains only ASCII characters, or 0 if not. The
    418    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
    419    ready. */
    420 #define PyUnicode_IS_ASCII(op)                   \
    421     (assert(PyUnicode_Check(op)),                \
    422      assert(PyUnicode_IS_READY(op)),             \
    423      ((PyASCIIObject*)op)->state.ascii)
    424 
    425 /* Return true if the string is compact or 0 if not.
    426    No type checks or Ready calls are performed. */
    427 #define PyUnicode_IS_COMPACT(op) \
    428     (((PyASCIIObject*)(op))->state.compact)
    429 
    430 /* Return true if the string is a compact ASCII string (use PyASCIIObject
    431    structure), or 0 if not.  No type checks or Ready calls are performed. */
    432 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
    433     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
    434 
    435 enum PyUnicode_Kind {
    436 /* String contains only wstr byte characters.  This is only possible
    437    when the string was created with a legacy API and _PyUnicode_Ready()
    438    has not been called yet.  */
    439     PyUnicode_WCHAR_KIND = 0,
    440 /* Return values of the PyUnicode_KIND() macro: */
    441     PyUnicode_1BYTE_KIND = 1,
    442     PyUnicode_2BYTE_KIND = 2,
    443     PyUnicode_4BYTE_KIND = 4
    444 };
    445 
    446 /* Return pointers to the canonical representation cast to unsigned char,
    447    Py_UCS2, or Py_UCS4 for direct character access.
    448    No checks are performed, use PyUnicode_KIND() before to ensure
    449    these will work correctly. */
    450 
    451 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
    452 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
    453 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
    454 
    455 /* Return one of the PyUnicode_*_KIND values defined above. */
    456 #define PyUnicode_KIND(op) \
    457     (assert(PyUnicode_Check(op)), \
    458      assert(PyUnicode_IS_READY(op)),            \
    459      ((PyASCIIObject *)(op))->state.kind)
    460 
    461 /* Return a void pointer to the raw unicode buffer. */
    462 #define _PyUnicode_COMPACT_DATA(op)                     \
    463     (PyUnicode_IS_ASCII(op) ?                   \
    464      ((void*)((PyASCIIObject*)(op) + 1)) :              \
    465      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
    466 
    467 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
    468     (assert(((PyUnicodeObject*)(op))->data.any),        \
    469      ((((PyUnicodeObject *)(op))->data.any)))
    470 
    471 #define PyUnicode_DATA(op) \
    472     (assert(PyUnicode_Check(op)), \
    473      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
    474      _PyUnicode_NONCOMPACT_DATA(op))
    475 
    476 /* In the access macros below, "kind" may be evaluated more than once.
    477    All other macro parameters are evaluated exactly once, so it is safe
    478    to put side effects into them (such as increasing the index). */
    479 
    480 /* Write into the canonical representation, this macro does not do any sanity
    481    checks and is intended for usage in loops.  The caller should cache the
    482    kind and data pointers obtained from other macro calls.
    483    index is the index in the string (starts at 0) and value is the new
    484    code point value which should be written to that location. */
    485 #define PyUnicode_WRITE(kind, data, index, value) \
    486     do { \
    487         switch ((kind)) { \
    488         case PyUnicode_1BYTE_KIND: { \
    489             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
    490             break; \
    491         } \
    492         case PyUnicode_2BYTE_KIND: { \
    493             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
    494             break; \
    495         } \
    496         default: { \
    497             assert((kind) == PyUnicode_4BYTE_KIND); \
    498             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
    499         } \
    500         } \
    501     } while (0)
    502 
    503 /* Read a code point from the string's canonical representation.  No checks
    504    or ready calls are performed. */
    505 #define PyUnicode_READ(kind, data, index) \
    506     ((Py_UCS4) \
    507     ((kind) == PyUnicode_1BYTE_KIND ? \
    508         ((const Py_UCS1 *)(data))[(index)] : \
    509         ((kind) == PyUnicode_2BYTE_KIND ? \
    510             ((const Py_UCS2 *)(data))[(index)] : \
    511             ((const Py_UCS4 *)(data))[(index)] \
    512         ) \
    513     ))
    514 
    515 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
    516    calls PyUnicode_KIND() and might call it twice.  For single reads, use
    517    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
    518    cache kind and use PyUnicode_READ instead. */
    519 #define PyUnicode_READ_CHAR(unicode, index) \
    520     (assert(PyUnicode_Check(unicode)),          \
    521      assert(PyUnicode_IS_READY(unicode)),       \
    522      (Py_UCS4)                                  \
    523         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
    524             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
    525             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
    526                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
    527                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
    528             ) \
    529         ))
    530 
    531 /* Returns the length of the unicode string. The caller has to make sure that
    532    the string has it's canonical representation set before calling
    533    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
    534 #define PyUnicode_GET_LENGTH(op)                \
    535     (assert(PyUnicode_Check(op)),               \
    536      assert(PyUnicode_IS_READY(op)),            \
    537      ((PyASCIIObject *)(op))->length)
    538 
    539 
    540 /* Fast check to determine whether an object is ready. Equivalent to
    541    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
    542 
    543 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
    544 
    545 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
    546    case.  If the canonical representation is not yet set, it will still call
    547    _PyUnicode_Ready().
    548    Returns 0 on success and -1 on errors. */
    549 #define PyUnicode_READY(op)                        \
    550     (assert(PyUnicode_Check(op)),                       \
    551      (PyUnicode_IS_READY(op) ?                          \
    552       0 : _PyUnicode_Ready((PyObject *)(op))))
    553 
    554 /* Return a maximum character value which is suitable for creating another
    555    string based on op.  This is always an approximation but more efficient
    556    than iterating over the string. */
    557 #define PyUnicode_MAX_CHAR_VALUE(op) \
    558     (assert(PyUnicode_IS_READY(op)),                                    \
    559      (PyUnicode_IS_ASCII(op) ?                                          \
    560       (0x7f) :                                                          \
    561       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
    562        (0xffU) :                                                        \
    563        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
    564         (0xffffU) :                                                     \
    565         (0x10ffffU)))))
    566 
    567 #endif
    568 
    569 /* --- Constants ---------------------------------------------------------- */
    570 
    571 /* This Unicode character will be used as replacement character during
    572    decoding if the errors argument is set to "replace". Note: the
    573    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
    574    Unicode 3.0. */
    575 
    576 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
    577 
    578 /* === Public API ========================================================= */
    579 
    580 /* --- Plain Py_UNICODE --------------------------------------------------- */
    581 
    582 /* With PEP 393, this is the recommended way to allocate a new unicode object.
    583    This function will allocate the object and its buffer in a single memory
    584    block.  Objects created using this function are not resizable. */
    585 #ifndef Py_LIMITED_API
    586 PyAPI_FUNC(PyObject*) PyUnicode_New(
    587     Py_ssize_t size,            /* Number of code points in the new string */
    588     Py_UCS4 maxchar             /* maximum code point value in the string */
    589     );
    590 #endif
    591 
    592 /* Initializes the canonical string representation from the deprecated
    593    wstr/Py_UNICODE representation. This function is used to convert Unicode
    594    objects which were created using the old API to the new flexible format
    595    introduced with PEP 393.
    596 
    597    Don't call this function directly, use the public PyUnicode_READY() macro
    598    instead. */
    599 #ifndef Py_LIMITED_API
    600 PyAPI_FUNC(int) _PyUnicode_Ready(
    601     PyObject *unicode           /* Unicode object */
    602     );
    603 #endif
    604 
    605 /* Get a copy of a Unicode string. */
    606 #ifndef Py_LIMITED_API
    607 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
    608     PyObject *unicode
    609     );
    610 #endif
    611 
    612 /* Copy character from one unicode object into another, this function performs
    613    character conversion when necessary and falls back to memcpy() if possible.
    614 
    615    Fail if to is too small (smaller than *how_many* or smaller than
    616    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
    617    kind(to), or if *to* has more than 1 reference.
    618 
    619    Return the number of written character, or return -1 and raise an exception
    620    on error.
    621 
    622    Pseudo-code:
    623 
    624        how_many = min(how_many, len(from) - from_start)
    625        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
    626        return how_many
    627 
    628    Note: The function doesn't write a terminating null character.
    629    */
    630 #ifndef Py_LIMITED_API
    631 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
    632     PyObject *to,
    633     Py_ssize_t to_start,
    634     PyObject *from,
    635     Py_ssize_t from_start,
    636     Py_ssize_t how_many
    637     );
    638 
    639 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
    640    may crash if parameters are invalid (e.g. if the output string
    641    is too short). */
    642 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
    643     PyObject *to,
    644     Py_ssize_t to_start,
    645     PyObject *from,
    646     Py_ssize_t from_start,
    647     Py_ssize_t how_many
    648     );
    649 #endif
    650 
    651 #ifndef Py_LIMITED_API
    652 /* Fill a string with a character: write fill_char into
    653    unicode[start:start+length].
    654 
    655    Fail if fill_char is bigger than the string maximum character, or if the
    656    string has more than 1 reference.
    657 
    658    Return the number of written character, or return -1 and raise an exception
    659    on error. */
    660 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
    661     PyObject *unicode,
    662     Py_ssize_t start,
    663     Py_ssize_t length,
    664     Py_UCS4 fill_char
    665     );
    666 
    667 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
    668    if parameters are invalid (e.g. if length is longer than the string). */
    669 PyAPI_FUNC(void) _PyUnicode_FastFill(
    670     PyObject *unicode,
    671     Py_ssize_t start,
    672     Py_ssize_t length,
    673     Py_UCS4 fill_char
    674     );
    675 #endif
    676 
    677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
    678    size.
    679 
    680    u may be NULL which causes the contents to be undefined. It is the
    681    user's responsibility to fill in the needed data afterwards. Note
    682    that modifying the Unicode object contents after construction is
    683    only allowed if u was set to NULL.
    684 
    685    The buffer is copied into the new object. */
    686 
    687 #ifndef Py_LIMITED_API
    688 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
    689     const Py_UNICODE *u,        /* Unicode buffer */
    690     Py_ssize_t size             /* size of buffer */
    691     );
    692 #endif
    693 
    694 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
    695 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
    696     const char *u,             /* UTF-8 encoded string */
    697     Py_ssize_t size            /* size of buffer */
    698     );
    699 
    700 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
    701    UTF-8 encoded bytes.  The size is determined with strlen(). */
    702 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
    703     const char *u              /* UTF-8 encoded string */
    704     );
    705 
    706 #ifndef Py_LIMITED_API
    707 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
    708    Scan the string to find the maximum character. */
    709 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
    710     int kind,
    711     const void *buffer,
    712     Py_ssize_t size);
    713 
    714 /* Create a new string from a buffer of ASCII characters.
    715    WARNING: Don't check if the string contains any non-ASCII character. */
    716 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
    717     const char *buffer,
    718     Py_ssize_t size);
    719 #endif
    720 
    721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    722 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
    723     PyObject *str,
    724     Py_ssize_t start,
    725     Py_ssize_t end);
    726 #endif
    727 
    728 #ifndef Py_LIMITED_API
    729 /* Compute the maximum character of the substring unicode[start:end].
    730    Return 127 for an empty string. */
    731 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
    732     PyObject *unicode,
    733     Py_ssize_t start,
    734     Py_ssize_t end);
    735 #endif
    736 
    737 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    738 /* Copy the string into a UCS4 buffer including the null character if copy_null
    739    is set. Return NULL and raise an exception on error. Raise a SystemError if
    740    the buffer is smaller than the string. Return buffer on success.
    741 
    742    buflen is the length of the buffer in (Py_UCS4) characters. */
    743 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
    744     PyObject *unicode,
    745     Py_UCS4* buffer,
    746     Py_ssize_t buflen,
    747     int copy_null);
    748 
    749 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
    750  * PyMem_Malloc; if this fails, NULL is returned with a memory error
    751    exception set. */
    752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
    753 #endif
    754 
    755 /* Return a read-only pointer to the Unicode object's internal
    756    Py_UNICODE buffer.
    757    If the wchar_t/Py_UNICODE representation is not yet available, this
    758    function will calculate it. */
    759 
    760 #ifndef Py_LIMITED_API
    761 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
    762     PyObject *unicode           /* Unicode object */
    763     );
    764 #endif
    765 
    766 /* Return a read-only pointer to the Unicode object's internal
    767    Py_UNICODE buffer and save the length at size.
    768    If the wchar_t/Py_UNICODE representation is not yet available, this
    769    function will calculate it. */
    770 
    771 #ifndef Py_LIMITED_API
    772 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
    773     PyObject *unicode,          /* Unicode object */
    774     Py_ssize_t *size            /* location where to save the length */
    775     );
    776 #endif
    777 
    778 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    779 /* Get the length of the Unicode object. */
    780 
    781 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
    782     PyObject *unicode
    783 );
    784 #endif
    785 
    786 /* Get the number of Py_UNICODE units in the
    787    string representation. */
    788 
    789 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
    790     PyObject *unicode           /* Unicode object */
    791     );
    792 
    793 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    794 /* Read a character from the string. */
    795 
    796 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
    797     PyObject *unicode,
    798     Py_ssize_t index
    799     );
    800 
    801 /* Write a character to the string. The string must have been created through
    802    PyUnicode_New, must not be shared, and must not have been hashed yet.
    803 
    804    Return 0 on success, -1 on error. */
    805 
    806 PyAPI_FUNC(int) PyUnicode_WriteChar(
    807     PyObject *unicode,
    808     Py_ssize_t index,
    809     Py_UCS4 character
    810     );
    811 #endif
    812 
    813 #ifndef Py_LIMITED_API
    814 /* Get the maximum ordinal for a Unicode character. */
    815 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
    816 #endif
    817 
    818 /* Resize a Unicode object. The length is the number of characters, except
    819    if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
    820    is the number of Py_UNICODE characters.
    821 
    822    *unicode is modified to point to the new (resized) object and 0
    823    returned on success.
    824 
    825    Try to resize the string in place (which is usually faster than allocating
    826    a new string and copy characters), or create a new string.
    827 
    828    Error handling is implemented as follows: an exception is set, -1
    829    is returned and *unicode left untouched.
    830 
    831    WARNING: The function doesn't check string content, the result may not be a
    832             string in canonical representation. */
    833 
    834 PyAPI_FUNC(int) PyUnicode_Resize(
    835     PyObject **unicode,         /* Pointer to the Unicode object */
    836     Py_ssize_t length           /* New length */
    837     );
    838 
    839 /* Decode obj to a Unicode object.
    840 
    841    bytes, bytearray and other bytes-like objects are decoded according to the
    842    given encoding and error handler. The encoding and error handler can be
    843    NULL to have the interface use UTF-8 and "strict".
    844 
    845    All other objects (including Unicode objects) raise an exception.
    846 
    847    The API returns NULL in case of an error. The caller is responsible
    848    for decref'ing the returned objects.
    849 
    850 */
    851 
    852 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
    853     PyObject *obj,              /* Object */
    854     const char *encoding,       /* encoding */
    855     const char *errors          /* error handling */
    856     );
    857 
    858 /* Copy an instance of a Unicode subtype to a new true Unicode object if
    859    necessary. If obj is already a true Unicode object (not a subtype), return
    860    the reference with *incremented* refcount.
    861 
    862    The API returns NULL in case of an error. The caller is responsible
    863    for decref'ing the returned objects.
    864 
    865 */
    866 
    867 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
    868     PyObject *obj      /* Object */
    869     );
    870 
    871 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
    872     const char *format,   /* ASCII-encoded string  */
    873     va_list vargs
    874     );
    875 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
    876     const char *format,   /* ASCII-encoded string  */
    877     ...
    878     );
    879 
    880 #ifndef Py_LIMITED_API
    881 typedef struct {
    882     PyObject *buffer;
    883     void *data;
    884     enum PyUnicode_Kind kind;
    885     Py_UCS4 maxchar;
    886     Py_ssize_t size;
    887     Py_ssize_t pos;
    888 
    889     /* minimum number of allocated characters (default: 0) */
    890     Py_ssize_t min_length;
    891 
    892     /* minimum character (default: 127, ASCII) */
    893     Py_UCS4 min_char;
    894 
    895     /* If non-zero, overallocate the buffer (default: 0). */
    896     unsigned char overallocate;
    897 
    898     /* If readonly is 1, buffer is a shared string (cannot be modified)
    899        and size is set to 0. */
    900     unsigned char readonly;
    901 } _PyUnicodeWriter ;
    902 
    903 /* Initialize a Unicode writer.
    904  *
    905  * By default, the minimum buffer size is 0 character and overallocation is
    906  * disabled. Set min_length, min_char and overallocate attributes to control
    907  * the allocation of the buffer. */
    908 PyAPI_FUNC(void)
    909 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
    910 
    911 /* Prepare the buffer to write 'length' characters
    912    with the specified maximum character.
    913 
    914    Return 0 on success, raise an exception and return -1 on error. */
    915 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
    916     (((MAXCHAR) <= (WRITER)->maxchar                                  \
    917       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
    918      ? 0                                                              \
    919      : (((LENGTH) == 0)                                               \
    920         ? 0                                                           \
    921         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
    922 
    923 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
    924    instead. */
    925 PyAPI_FUNC(int)
    926 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
    927                                  Py_ssize_t length, Py_UCS4 maxchar);
    928 
    929 /* Prepare the buffer to have at least the kind KIND.
    930    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
    931    support characters in range U+000-U+FFFF.
    932 
    933    Return 0 on success, raise an exception and return -1 on error. */
    934 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
    935     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
    936      (KIND) <= (WRITER)->kind                                         \
    937      ? 0                                                              \
    938      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
    939 
    940 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
    941    macro instead. */
    942 PyAPI_FUNC(int)
    943 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
    944                                      enum PyUnicode_Kind kind);
    945 
    946 /* Append a Unicode character.
    947    Return 0 on success, raise an exception and return -1 on error. */
    948 PyAPI_FUNC(int)
    949 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
    950     Py_UCS4 ch
    951     );
    952 
    953 /* Append a Unicode string.
    954    Return 0 on success, raise an exception and return -1 on error. */
    955 PyAPI_FUNC(int)
    956 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
    957     PyObject *str               /* Unicode string */
    958     );
    959 
    960 /* Append a substring of a Unicode string.
    961    Return 0 on success, raise an exception and return -1 on error. */
    962 PyAPI_FUNC(int)
    963 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
    964     PyObject *str,              /* Unicode string */
    965     Py_ssize_t start,
    966     Py_ssize_t end
    967     );
    968 
    969 /* Append an ASCII-encoded byte string.
    970    Return 0 on success, raise an exception and return -1 on error. */
    971 PyAPI_FUNC(int)
    972 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
    973     const char *str,           /* ASCII-encoded byte string */
    974     Py_ssize_t len             /* number of bytes, or -1 if unknown */
    975     );
    976 
    977 /* Append a latin1-encoded byte string.
    978    Return 0 on success, raise an exception and return -1 on error. */
    979 PyAPI_FUNC(int)
    980 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
    981     const char *str,           /* latin1-encoded byte string */
    982     Py_ssize_t len             /* length in bytes */
    983     );
    984 
    985 /* Get the value of the writer as a Unicode string. Clear the
    986    buffer of the writer. Raise an exception and return NULL
    987    on error. */
    988 PyAPI_FUNC(PyObject *)
    989 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
    990 
    991 /* Deallocate memory of a writer (clear its internal buffer). */
    992 PyAPI_FUNC(void)
    993 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
    994 #endif
    995 
    996 #ifndef Py_LIMITED_API
    997 /* Format the object based on the format_spec, as defined in PEP 3101
    998    (Advanced String Formatting). */
    999 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
   1000     _PyUnicodeWriter *writer,
   1001     PyObject *obj,
   1002     PyObject *format_spec,
   1003     Py_ssize_t start,
   1004     Py_ssize_t end);
   1005 #endif
   1006 
   1007 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
   1008 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
   1009 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
   1010     const char *u              /* UTF-8 encoded string */
   1011     );
   1012 #ifndef Py_LIMITED_API
   1013 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
   1014 #endif
   1015 
   1016 /* Use only if you know it's a string */
   1017 #define PyUnicode_CHECK_INTERNED(op) \
   1018     (((PyASCIIObject *)(op))->state.interned)
   1019 
   1020 /* --- wchar_t support for platforms which support it --------------------- */
   1021 
   1022 #ifdef HAVE_WCHAR_H
   1023 
   1024 /* Create a Unicode Object from the wchar_t buffer w of the given
   1025    size.
   1026 
   1027    The buffer is copied into the new object. */
   1028 
   1029 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
   1030     const wchar_t *w,           /* wchar_t buffer */
   1031     Py_ssize_t size             /* size of buffer */
   1032     );
   1033 
   1034 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
   1035    most size wchar_t characters are copied.
   1036 
   1037    Note that the resulting wchar_t string may or may not be
   1038    0-terminated.  It is the responsibility of the caller to make sure
   1039    that the wchar_t string is 0-terminated in case this is required by
   1040    the application.
   1041 
   1042    Returns the number of wchar_t characters copied (excluding a
   1043    possibly trailing 0-termination character) or -1 in case of an
   1044    error. */
   1045 
   1046 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
   1047     PyObject *unicode,          /* Unicode object */
   1048     wchar_t *w,                 /* wchar_t buffer */
   1049     Py_ssize_t size             /* size of buffer */
   1050     );
   1051 
   1052 /* Convert the Unicode object to a wide character string. The output string
   1053    always ends with a nul character. If size is not NULL, write the number of
   1054    wide characters (excluding the null character) into *size.
   1055 
   1056    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
   1057    on success. On error, returns NULL, *size is undefined and raises a
   1058    MemoryError. */
   1059 
   1060 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
   1061     PyObject *unicode,          /* Unicode object */
   1062     Py_ssize_t *size            /* number of characters of the result */
   1063     );
   1064 
   1065 #ifndef Py_LIMITED_API
   1066 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
   1067 #endif
   1068 
   1069 #endif
   1070 
   1071 /* --- Unicode ordinals --------------------------------------------------- */
   1072 
   1073 /* Create a Unicode Object from the given Unicode code point ordinal.
   1074 
   1075    The ordinal must be in range(0x110000). A ValueError is
   1076    raised in case it is not.
   1077 
   1078 */
   1079 
   1080 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
   1081 
   1082 /* --- Free-list management ----------------------------------------------- */
   1083 
   1084 /* Clear the free list used by the Unicode implementation.
   1085 
   1086    This can be used to release memory used for objects on the free
   1087    list back to the Python memory allocator.
   1088 
   1089 */
   1090 
   1091 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
   1092 
   1093 /* === Builtin Codecs =====================================================
   1094 
   1095    Many of these APIs take two arguments encoding and errors. These
   1096    parameters encoding and errors have the same semantics as the ones
   1097    of the builtin str() API.
   1098 
   1099    Setting encoding to NULL causes the default encoding (UTF-8) to be used.
   1100 
   1101    Error handling is set by errors which may also be set to NULL
   1102    meaning to use the default handling defined for the codec. Default
   1103    error handling for all builtin codecs is "strict" (ValueErrors are
   1104    raised).
   1105 
   1106    The codecs all use a similar interface. Only deviation from the
   1107    generic ones are documented.
   1108 
   1109 */
   1110 
   1111 /* --- Manage the default encoding ---------------------------------------- */
   1112 
   1113 /* Returns a pointer to the default encoding (UTF-8) of the
   1114    Unicode object unicode and the size of the encoded representation
   1115    in bytes stored in *size.
   1116 
   1117    In case of an error, no *size is set.
   1118 
   1119    This function caches the UTF-8 encoded string in the unicodeobject
   1120    and subsequent calls will return the same string.  The memory is released
   1121    when the unicodeobject is deallocated.
   1122 
   1123    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
   1124    support the previous internal function with the same behaviour.
   1125 
   1126    *** This API is for interpreter INTERNAL USE ONLY and will likely
   1127    *** be removed or changed in the future.
   1128 
   1129    *** If you need to access the Unicode object as UTF-8 bytes string,
   1130    *** please use PyUnicode_AsUTF8String() instead.
   1131 */
   1132 
   1133 #ifndef Py_LIMITED_API
   1134 PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
   1135     PyObject *unicode,
   1136     Py_ssize_t *size);
   1137 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
   1138 #endif
   1139 
   1140 /* Returns a pointer to the default encoding (UTF-8) of the
   1141    Unicode object unicode.
   1142 
   1143    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
   1144    in the unicodeobject.
   1145 
   1146    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
   1147    support the previous internal function with the same behaviour.
   1148 
   1149    Use of this API is DEPRECATED since no size information can be
   1150    extracted from the returned data.
   1151 
   1152    *** This API is for interpreter INTERNAL USE ONLY and will likely
   1153    *** be removed or changed for Python 3.1.
   1154 
   1155    *** If you need to access the Unicode object as UTF-8 bytes string,
   1156    *** please use PyUnicode_AsUTF8String() instead.
   1157 
   1158 */
   1159 
   1160 #ifndef Py_LIMITED_API
   1161 PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
   1162 #define _PyUnicode_AsString PyUnicode_AsUTF8
   1163 #endif
   1164 
   1165 /* Returns "utf-8".  */
   1166 
   1167 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
   1168 
   1169 /* --- Generic Codecs ----------------------------------------------------- */
   1170 
   1171 /* Create a Unicode object by decoding the encoded string s of the
   1172    given size. */
   1173 
   1174 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
   1175     const char *s,              /* encoded string */
   1176     Py_ssize_t size,            /* size of buffer */
   1177     const char *encoding,       /* encoding */
   1178     const char *errors          /* error handling */
   1179     );
   1180 
   1181 /* Decode a Unicode object unicode and return the result as Python
   1182    object.
   1183 
   1184    This API is DEPRECATED. The only supported standard encoding is rot13.
   1185    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
   1186    that decode from str. */
   1187 
   1188 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
   1189     PyObject *unicode,          /* Unicode object */
   1190     const char *encoding,       /* encoding */
   1191     const char *errors          /* error handling */
   1192     ) Py_DEPRECATED(3.6);
   1193 
   1194 /* Decode a Unicode object unicode and return the result as Unicode
   1195    object.
   1196 
   1197    This API is DEPRECATED. The only supported standard encoding is rot13.
   1198    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
   1199    that decode from str to str. */
   1200 
   1201 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
   1202     PyObject *unicode,          /* Unicode object */
   1203     const char *encoding,       /* encoding */
   1204     const char *errors          /* error handling */
   1205     ) Py_DEPRECATED(3.6);
   1206 
   1207 /* Encodes a Py_UNICODE buffer of the given size and returns a
   1208    Python string object. */
   1209 
   1210 #ifndef Py_LIMITED_API
   1211 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
   1212     const Py_UNICODE *s,        /* Unicode char buffer */
   1213     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
   1214     const char *encoding,       /* encoding */
   1215     const char *errors          /* error handling */
   1216     );
   1217 #endif
   1218 
   1219 /* Encodes a Unicode object and returns the result as Python
   1220    object.
   1221 
   1222    This API is DEPRECATED.  It is superceeded by PyUnicode_AsEncodedString()
   1223    since all standard encodings (except rot13) encode str to bytes.
   1224    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
   1225    that encode form str to non-bytes. */
   1226 
   1227 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
   1228     PyObject *unicode,          /* Unicode object */
   1229     const char *encoding,       /* encoding */
   1230     const char *errors          /* error handling */
   1231     ) Py_DEPRECATED(3.6);
   1232 
   1233 /* Encodes a Unicode object and returns the result as Python string
   1234    object. */
   1235 
   1236 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
   1237     PyObject *unicode,          /* Unicode object */
   1238     const char *encoding,       /* encoding */
   1239     const char *errors          /* error handling */
   1240     );
   1241 
   1242 /* Encodes a Unicode object and returns the result as Unicode
   1243    object.
   1244 
   1245    This API is DEPRECATED.  The only supported standard encodings is rot13.
   1246    Use PyCodec_Encode() to encode with rot13 and non-standard codecs
   1247    that encode from str to str. */
   1248 
   1249 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
   1250     PyObject *unicode,          /* Unicode object */
   1251     const char *encoding,       /* encoding */
   1252     const char *errors          /* error handling */
   1253     ) Py_DEPRECATED(3.6);
   1254 
   1255 /* Build an encoding map. */
   1256 
   1257 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
   1258     PyObject* string            /* 256 character map */
   1259    );
   1260 
   1261 /* --- UTF-7 Codecs ------------------------------------------------------- */
   1262 
   1263 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
   1264     const char *string,         /* UTF-7 encoded string */
   1265     Py_ssize_t length,          /* size of string */
   1266     const char *errors          /* error handling */
   1267     );
   1268 
   1269 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
   1270     const char *string,         /* UTF-7 encoded string */
   1271     Py_ssize_t length,          /* size of string */
   1272     const char *errors,         /* error handling */
   1273     Py_ssize_t *consumed        /* bytes consumed */
   1274     );
   1275 
   1276 #ifndef Py_LIMITED_API
   1277 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
   1278     const Py_UNICODE *data,     /* Unicode char buffer */
   1279     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1280     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
   1281     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
   1282     const char *errors          /* error handling */
   1283     );
   1284 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
   1285     PyObject *unicode,          /* Unicode object */
   1286     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
   1287     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
   1288     const char *errors          /* error handling */
   1289     );
   1290 #endif
   1291 
   1292 /* --- UTF-8 Codecs ------------------------------------------------------- */
   1293 
   1294 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
   1295     const char *string,         /* UTF-8 encoded string */
   1296     Py_ssize_t length,          /* size of string */
   1297     const char *errors          /* error handling */
   1298     );
   1299 
   1300 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
   1301     const char *string,         /* UTF-8 encoded string */
   1302     Py_ssize_t length,          /* size of string */
   1303     const char *errors,         /* error handling */
   1304     Py_ssize_t *consumed        /* bytes consumed */
   1305     );
   1306 
   1307 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
   1308     PyObject *unicode           /* Unicode object */
   1309     );
   1310 
   1311 #ifndef Py_LIMITED_API
   1312 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
   1313     PyObject *unicode,
   1314     const char *errors);
   1315 
   1316 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
   1317     const Py_UNICODE *data,     /* Unicode char buffer */
   1318     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1319     const char *errors          /* error handling */
   1320     );
   1321 #endif
   1322 
   1323 /* --- UTF-32 Codecs ------------------------------------------------------ */
   1324 
   1325 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
   1326    the corresponding Unicode object.
   1327 
   1328    errors (if non-NULL) defines the error handling. It defaults
   1329    to "strict".
   1330 
   1331    If byteorder is non-NULL, the decoder starts decoding using the
   1332    given byte order:
   1333 
   1334     *byteorder == -1: little endian
   1335     *byteorder == 0:  native order
   1336     *byteorder == 1:  big endian
   1337 
   1338    In native mode, the first four bytes of the stream are checked for a
   1339    BOM mark. If found, the BOM mark is analysed, the byte order
   1340    adjusted and the BOM skipped.  In the other modes, no BOM mark
   1341    interpretation is done. After completion, *byteorder is set to the
   1342    current byte order at the end of input data.
   1343 
   1344    If byteorder is NULL, the codec starts in native order mode.
   1345 
   1346 */
   1347 
   1348 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
   1349     const char *string,         /* UTF-32 encoded string */
   1350     Py_ssize_t length,          /* size of string */
   1351     const char *errors,         /* error handling */
   1352     int *byteorder              /* pointer to byteorder to use
   1353                                    0=native;-1=LE,1=BE; updated on
   1354                                    exit */
   1355     );
   1356 
   1357 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
   1358     const char *string,         /* UTF-32 encoded string */
   1359     Py_ssize_t length,          /* size of string */
   1360     const char *errors,         /* error handling */
   1361     int *byteorder,             /* pointer to byteorder to use
   1362                                    0=native;-1=LE,1=BE; updated on
   1363                                    exit */
   1364     Py_ssize_t *consumed        /* bytes consumed */
   1365     );
   1366 
   1367 /* Returns a Python string using the UTF-32 encoding in native byte
   1368    order. The string always starts with a BOM mark.  */
   1369 
   1370 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
   1371     PyObject *unicode           /* Unicode object */
   1372     );
   1373 
   1374 /* Returns a Python string object holding the UTF-32 encoded value of
   1375    the Unicode data.
   1376 
   1377    If byteorder is not 0, output is written according to the following
   1378    byte order:
   1379 
   1380    byteorder == -1: little endian
   1381    byteorder == 0:  native byte order (writes a BOM mark)
   1382    byteorder == 1:  big endian
   1383 
   1384    If byteorder is 0, the output string will always start with the
   1385    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
   1386    prepended.
   1387 
   1388 */
   1389 
   1390 #ifndef Py_LIMITED_API
   1391 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
   1392     const Py_UNICODE *data,     /* Unicode char buffer */
   1393     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1394     const char *errors,         /* error handling */
   1395     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1396     );
   1397 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
   1398     PyObject *object,           /* Unicode object */
   1399     const char *errors,         /* error handling */
   1400     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1401     );
   1402 #endif
   1403 
   1404 /* --- UTF-16 Codecs ------------------------------------------------------ */
   1405 
   1406 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
   1407    the corresponding Unicode object.
   1408 
   1409    errors (if non-NULL) defines the error handling. It defaults
   1410    to "strict".
   1411 
   1412    If byteorder is non-NULL, the decoder starts decoding using the
   1413    given byte order:
   1414 
   1415     *byteorder == -1: little endian
   1416     *byteorder == 0:  native order
   1417     *byteorder == 1:  big endian
   1418 
   1419    In native mode, the first two bytes of the stream are checked for a
   1420    BOM mark. If found, the BOM mark is analysed, the byte order
   1421    adjusted and the BOM skipped.  In the other modes, no BOM mark
   1422    interpretation is done. After completion, *byteorder is set to the
   1423    current byte order at the end of input data.
   1424 
   1425    If byteorder is NULL, the codec starts in native order mode.
   1426 
   1427 */
   1428 
   1429 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
   1430     const char *string,         /* UTF-16 encoded string */
   1431     Py_ssize_t length,          /* size of string */
   1432     const char *errors,         /* error handling */
   1433     int *byteorder              /* pointer to byteorder to use
   1434                                    0=native;-1=LE,1=BE; updated on
   1435                                    exit */
   1436     );
   1437 
   1438 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
   1439     const char *string,         /* UTF-16 encoded string */
   1440     Py_ssize_t length,          /* size of string */
   1441     const char *errors,         /* error handling */
   1442     int *byteorder,             /* pointer to byteorder to use
   1443                                    0=native;-1=LE,1=BE; updated on
   1444                                    exit */
   1445     Py_ssize_t *consumed        /* bytes consumed */
   1446     );
   1447 
   1448 /* Returns a Python string using the UTF-16 encoding in native byte
   1449    order. The string always starts with a BOM mark.  */
   1450 
   1451 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
   1452     PyObject *unicode           /* Unicode object */
   1453     );
   1454 
   1455 /* Returns a Python string object holding the UTF-16 encoded value of
   1456    the Unicode data.
   1457 
   1458    If byteorder is not 0, output is written according to the following
   1459    byte order:
   1460 
   1461    byteorder == -1: little endian
   1462    byteorder == 0:  native byte order (writes a BOM mark)
   1463    byteorder == 1:  big endian
   1464 
   1465    If byteorder is 0, the output string will always start with the
   1466    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
   1467    prepended.
   1468 
   1469    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
   1470    UCS-2. This trick makes it possible to add full UTF-16 capabilities
   1471    at a later point without compromising the APIs.
   1472 
   1473 */
   1474 
   1475 #ifndef Py_LIMITED_API
   1476 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
   1477     const Py_UNICODE *data,     /* Unicode char buffer */
   1478     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1479     const char *errors,         /* error handling */
   1480     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1481     );
   1482 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
   1483     PyObject* unicode,          /* Unicode object */
   1484     const char *errors,         /* error handling */
   1485     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1486     );
   1487 #endif
   1488 
   1489 /* --- Unicode-Escape Codecs ---------------------------------------------- */
   1490 
   1491 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
   1492     const char *string,         /* Unicode-Escape encoded string */
   1493     Py_ssize_t length,          /* size of string */
   1494     const char *errors          /* error handling */
   1495     );
   1496 
   1497 #ifndef Py_LIMITED_API
   1498 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
   1499    chars. */
   1500 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
   1501         const char *string,     /* Unicode-Escape encoded string */
   1502         Py_ssize_t length,      /* size of string */
   1503         const char *errors,     /* error handling */
   1504         const char **first_invalid_escape  /* on return, points to first
   1505                                               invalid escaped char in
   1506                                               string. */
   1507 );
   1508 #endif
   1509 
   1510 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
   1511     PyObject *unicode           /* Unicode object */
   1512     );
   1513 
   1514 #ifndef Py_LIMITED_API
   1515 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
   1516     const Py_UNICODE *data,     /* Unicode char buffer */
   1517     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
   1518     );
   1519 #endif
   1520 
   1521 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
   1522 
   1523 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
   1524     const char *string,         /* Raw-Unicode-Escape encoded string */
   1525     Py_ssize_t length,          /* size of string */
   1526     const char *errors          /* error handling */
   1527     );
   1528 
   1529 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
   1530     PyObject *unicode           /* Unicode object */
   1531     );
   1532 
   1533 #ifndef Py_LIMITED_API
   1534 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
   1535     const Py_UNICODE *data,     /* Unicode char buffer */
   1536     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
   1537     );
   1538 #endif
   1539 
   1540 /* --- Unicode Internal Codec ---------------------------------------------
   1541 
   1542     Only for internal use in _codecsmodule.c */
   1543 
   1544 #ifndef Py_LIMITED_API
   1545 PyObject *_PyUnicode_DecodeUnicodeInternal(
   1546     const char *string,
   1547     Py_ssize_t length,
   1548     const char *errors
   1549     );
   1550 #endif
   1551 
   1552 /* --- Latin-1 Codecs -----------------------------------------------------
   1553 
   1554    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
   1555 
   1556 */
   1557 
   1558 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
   1559     const char *string,         /* Latin-1 encoded string */
   1560     Py_ssize_t length,          /* size of string */
   1561     const char *errors          /* error handling */
   1562     );
   1563 
   1564 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
   1565     PyObject *unicode           /* Unicode object */
   1566     );
   1567 
   1568 #ifndef Py_LIMITED_API
   1569 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
   1570     PyObject* unicode,
   1571     const char* errors);
   1572 
   1573 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
   1574     const Py_UNICODE *data,     /* Unicode char buffer */
   1575     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1576     const char *errors          /* error handling */
   1577     );
   1578 #endif
   1579 
   1580 /* --- ASCII Codecs -------------------------------------------------------
   1581 
   1582    Only 7-bit ASCII data is excepted. All other codes generate errors.
   1583 
   1584 */
   1585 
   1586 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
   1587     const char *string,         /* ASCII encoded string */
   1588     Py_ssize_t length,          /* size of string */
   1589     const char *errors          /* error handling */
   1590     );
   1591 
   1592 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
   1593     PyObject *unicode           /* Unicode object */
   1594     );
   1595 
   1596 #ifndef Py_LIMITED_API
   1597 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
   1598     PyObject* unicode,
   1599     const char* errors);
   1600 
   1601 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
   1602     const Py_UNICODE *data,     /* Unicode char buffer */
   1603     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1604     const char *errors          /* error handling */
   1605     );
   1606 #endif
   1607 
   1608 /* --- Character Map Codecs -----------------------------------------------
   1609 
   1610    This codec uses mappings to encode and decode characters.
   1611 
   1612    Decoding mappings must map single string characters to single
   1613    Unicode characters, integers (which are then interpreted as Unicode
   1614    ordinals) or None (meaning "undefined mapping" and causing an
   1615    error).
   1616 
   1617    Encoding mappings must map single Unicode characters to single
   1618    string characters, integers (which are then interpreted as Latin-1
   1619    ordinals) or None (meaning "undefined mapping" and causing an
   1620    error).
   1621 
   1622    If a character lookup fails with a LookupError, the character is
   1623    copied as-is meaning that its ordinal value will be interpreted as
   1624    Unicode or Latin-1 ordinal resp. Because of this mappings only need
   1625    to contain those mappings which map characters to different code
   1626    points.
   1627 
   1628 */
   1629 
   1630 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
   1631     const char *string,         /* Encoded string */
   1632     Py_ssize_t length,          /* size of string */
   1633     PyObject *mapping,          /* character mapping
   1634                                    (char ordinal -> unicode ordinal) */
   1635     const char *errors          /* error handling */
   1636     );
   1637 
   1638 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
   1639     PyObject *unicode,          /* Unicode object */
   1640     PyObject *mapping           /* character mapping
   1641                                    (unicode ordinal -> char ordinal) */
   1642     );
   1643 
   1644 #ifndef Py_LIMITED_API
   1645 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
   1646     const Py_UNICODE *data,     /* Unicode char buffer */
   1647     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1648     PyObject *mapping,          /* character mapping
   1649                                    (unicode ordinal -> char ordinal) */
   1650     const char *errors          /* error handling */
   1651     );
   1652 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
   1653     PyObject *unicode,          /* Unicode object */
   1654     PyObject *mapping,          /* character mapping
   1655                                    (unicode ordinal -> char ordinal) */
   1656     const char *errors          /* error handling */
   1657     );
   1658 #endif
   1659 
   1660 /* Translate a Py_UNICODE buffer of the given length by applying a
   1661    character mapping table to it and return the resulting Unicode
   1662    object.
   1663 
   1664    The mapping table must map Unicode ordinal integers to Unicode
   1665    ordinal integers or None (causing deletion of the character).
   1666 
   1667    Mapping tables may be dictionaries or sequences. Unmapped character
   1668    ordinals (ones which cause a LookupError) are left untouched and
   1669    are copied as-is.
   1670 
   1671 */
   1672 
   1673 #ifndef Py_LIMITED_API
   1674 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
   1675     const Py_UNICODE *data,     /* Unicode char buffer */
   1676     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1677     PyObject *table,            /* Translate table */
   1678     const char *errors          /* error handling */
   1679     );
   1680 #endif
   1681 
   1682 #ifdef MS_WINDOWS
   1683 
   1684 /* --- MBCS codecs for Windows -------------------------------------------- */
   1685 
   1686 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
   1687     const char *string,         /* MBCS encoded string */
   1688     Py_ssize_t length,          /* size of string */
   1689     const char *errors          /* error handling */
   1690     );
   1691 
   1692 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
   1693     const char *string,         /* MBCS encoded string */
   1694     Py_ssize_t length,          /* size of string */
   1695     const char *errors,         /* error handling */
   1696     Py_ssize_t *consumed        /* bytes consumed */
   1697     );
   1698 
   1699 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   1700 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
   1701     int code_page,              /* code page number */
   1702     const char *string,         /* encoded string */
   1703     Py_ssize_t length,          /* size of string */
   1704     const char *errors,         /* error handling */
   1705     Py_ssize_t *consumed        /* bytes consumed */
   1706     );
   1707 #endif
   1708 
   1709 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
   1710     PyObject *unicode           /* Unicode object */
   1711     );
   1712 
   1713 #ifndef Py_LIMITED_API
   1714 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
   1715     const Py_UNICODE *data,     /* Unicode char buffer */
   1716     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1717     const char *errors          /* error handling */
   1718     );
   1719 #endif
   1720 
   1721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   1722 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
   1723     int code_page,              /* code page number */
   1724     PyObject *unicode,          /* Unicode object */
   1725     const char *errors          /* error handling */
   1726     );
   1727 #endif
   1728 
   1729 #endif /* MS_WINDOWS */
   1730 
   1731 /* --- Decimal Encoder ---------------------------------------------------- */
   1732 
   1733 /* Takes a Unicode string holding a decimal value and writes it into
   1734    an output buffer using standard ASCII digit codes.
   1735 
   1736    The output buffer has to provide at least length+1 bytes of storage
   1737    area. The output string is 0-terminated.
   1738 
   1739    The encoder converts whitespace to ' ', decimal characters to their
   1740    corresponding ASCII digit and all other Latin-1 characters except
   1741    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
   1742    are treated as errors. This includes embedded NULL bytes.
   1743 
   1744    Error handling is defined by the errors argument:
   1745 
   1746       NULL or "strict": raise a ValueError
   1747       "ignore": ignore the wrong characters (these are not copied to the
   1748                 output buffer)
   1749       "replace": replaces illegal characters with '?'
   1750 
   1751    Returns 0 on success, -1 on failure.
   1752 
   1753 */
   1754 
   1755 #ifndef Py_LIMITED_API
   1756 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
   1757     Py_UNICODE *s,              /* Unicode buffer */
   1758     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1759     char *output,               /* Output buffer; must have size >= length */
   1760     const char *errors          /* error handling */
   1761     );
   1762 #endif
   1763 
   1764 /* Transforms code points that have decimal digit property to the
   1765    corresponding ASCII digit code points.
   1766 
   1767    Returns a new Unicode string on success, NULL on failure.
   1768 */
   1769 
   1770 #ifndef Py_LIMITED_API
   1771 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
   1772     Py_UNICODE *s,              /* Unicode buffer */
   1773     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
   1774     );
   1775 #endif
   1776 
   1777 /* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject
   1778    as argument instead of a raw buffer and length.  This function additionally
   1779    transforms spaces to ASCII because this is what the callers in longobject,
   1780    floatobject, and complexobject did anyways. */
   1781 
   1782 #ifndef Py_LIMITED_API
   1783 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
   1784     PyObject *unicode           /* Unicode object */
   1785     );
   1786 #endif
   1787 
   1788 /* --- Locale encoding --------------------------------------------------- */
   1789 
   1790 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   1791 /* Decode a string from the current locale encoding. The decoder is strict if
   1792    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
   1793    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
   1794    be decoded as a surrogate character and *surrogateescape* is not equal to
   1795    zero, the byte sequence is escaped using the 'surrogateescape' error handler
   1796    instead of being decoded. *str* must end with a null character but cannot
   1797    contain embedded null characters. */
   1798 
   1799 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
   1800     const char *str,
   1801     Py_ssize_t len,
   1802     const char *errors);
   1803 
   1804 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
   1805    length using strlen(). */
   1806 
   1807 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
   1808     const char *str,
   1809     const char *errors);
   1810 
   1811 /* Encode a Unicode object to the current locale encoding. The encoder is
   1812    strict is *surrogateescape* is equal to zero, otherwise the
   1813    "surrogateescape" error handler is used. Return a bytes object. The string
   1814    cannot contain embedded null characters. */
   1815 
   1816 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
   1817     PyObject *unicode,
   1818     const char *errors
   1819     );
   1820 #endif
   1821 
   1822 /* --- File system encoding ---------------------------------------------- */
   1823 
   1824 /* ParseTuple converter: encode str objects to bytes using
   1825    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
   1826 
   1827 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
   1828 
   1829 /* ParseTuple converter: decode bytes objects to unicode using
   1830    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
   1831 
   1832 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
   1833 
   1834 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
   1835    and the "surrogateescape" error handler.
   1836 
   1837    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
   1838    encoding.
   1839 
   1840    Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
   1841 */
   1842 
   1843 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
   1844     const char *s               /* encoded string */
   1845     );
   1846 
   1847 /* Decode a string using Py_FileSystemDefaultEncoding
   1848    and the "surrogateescape" error handler.
   1849 
   1850    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
   1851    encoding.
   1852 */
   1853 
   1854 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
   1855     const char *s,               /* encoded string */
   1856     Py_ssize_t size              /* size */
   1857     );
   1858 
   1859 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
   1860    "surrogateescape" error handler, and return bytes.
   1861 
   1862    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
   1863    encoding.
   1864 */
   1865 
   1866 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
   1867     PyObject *unicode
   1868     );
   1869 
   1870 /* --- Methods & Slots ----------------------------------------------------
   1871 
   1872    These are capable of handling Unicode objects and strings on input
   1873    (we refer to them as strings in the descriptions) and return
   1874    Unicode objects or integers as appropriate. */
   1875 
   1876 /* Concat two strings giving a new Unicode string. */
   1877 
   1878 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
   1879     PyObject *left,             /* Left string */
   1880     PyObject *right             /* Right string */
   1881     );
   1882 
   1883 /* Concat two strings and put the result in *pleft
   1884    (sets *pleft to NULL on error) */
   1885 
   1886 PyAPI_FUNC(void) PyUnicode_Append(
   1887     PyObject **pleft,           /* Pointer to left string */
   1888     PyObject *right             /* Right string */
   1889     );
   1890 
   1891 /* Concat two strings, put the result in *pleft and drop the right object
   1892    (sets *pleft to NULL on error) */
   1893 
   1894 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
   1895     PyObject **pleft,           /* Pointer to left string */
   1896     PyObject *right             /* Right string */
   1897     );
   1898 
   1899 /* Split a string giving a list of Unicode strings.
   1900 
   1901    If sep is NULL, splitting will be done at all whitespace
   1902    substrings. Otherwise, splits occur at the given separator.
   1903 
   1904    At most maxsplit splits will be done. If negative, no limit is set.
   1905 
   1906    Separators are not included in the resulting list.
   1907 
   1908 */
   1909 
   1910 PyAPI_FUNC(PyObject*) PyUnicode_Split(
   1911     PyObject *s,                /* String to split */
   1912     PyObject *sep,              /* String separator */
   1913     Py_ssize_t maxsplit         /* Maxsplit count */
   1914     );
   1915 
   1916 /* Dito, but split at line breaks.
   1917 
   1918    CRLF is considered to be one line break. Line breaks are not
   1919    included in the resulting list. */
   1920 
   1921 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
   1922     PyObject *s,                /* String to split */
   1923     int keepends                /* If true, line end markers are included */
   1924     );
   1925 
   1926 /* Partition a string using a given separator. */
   1927 
   1928 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
   1929     PyObject *s,                /* String to partition */
   1930     PyObject *sep               /* String separator */
   1931     );
   1932 
   1933 /* Partition a string using a given separator, searching from the end of the
   1934    string. */
   1935 
   1936 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
   1937     PyObject *s,                /* String to partition */
   1938     PyObject *sep               /* String separator */
   1939     );
   1940 
   1941 /* Split a string giving a list of Unicode strings.
   1942 
   1943    If sep is NULL, splitting will be done at all whitespace
   1944    substrings. Otherwise, splits occur at the given separator.
   1945 
   1946    At most maxsplit splits will be done. But unlike PyUnicode_Split
   1947    PyUnicode_RSplit splits from the end of the string. If negative,
   1948    no limit is set.
   1949 
   1950    Separators are not included in the resulting list.
   1951 
   1952 */
   1953 
   1954 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
   1955     PyObject *s,                /* String to split */
   1956     PyObject *sep,              /* String separator */
   1957     Py_ssize_t maxsplit         /* Maxsplit count */
   1958     );
   1959 
   1960 /* Translate a string by applying a character mapping table to it and
   1961    return the resulting Unicode object.
   1962 
   1963    The mapping table must map Unicode ordinal integers to Unicode
   1964    ordinal integers or None (causing deletion of the character).
   1965 
   1966    Mapping tables may be dictionaries or sequences. Unmapped character
   1967    ordinals (ones which cause a LookupError) are left untouched and
   1968    are copied as-is.
   1969 
   1970 */
   1971 
   1972 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
   1973     PyObject *str,              /* String */
   1974     PyObject *table,            /* Translate table */
   1975     const char *errors          /* error handling */
   1976     );
   1977 
   1978 /* Join a sequence of strings using the given separator and return
   1979    the resulting Unicode string. */
   1980 
   1981 PyAPI_FUNC(PyObject*) PyUnicode_Join(
   1982     PyObject *separator,        /* Separator string */
   1983     PyObject *seq               /* Sequence object */
   1984     );
   1985 
   1986 #ifndef Py_LIMITED_API
   1987 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
   1988     PyObject *separator,
   1989     PyObject **items,
   1990     Py_ssize_t seqlen
   1991     );
   1992 #endif /* Py_LIMITED_API */
   1993 
   1994 /* Return 1 if substr matches str[start:end] at the given tail end, 0
   1995    otherwise. */
   1996 
   1997 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
   1998     PyObject *str,              /* String */
   1999     PyObject *substr,           /* Prefix or Suffix string */
   2000     Py_ssize_t start,           /* Start index */
   2001     Py_ssize_t end,             /* Stop index */
   2002     int direction               /* Tail end: -1 prefix, +1 suffix */
   2003     );
   2004 
   2005 /* Return the first position of substr in str[start:end] using the
   2006    given search direction or -1 if not found. -2 is returned in case
   2007    an error occurred and an exception is set. */
   2008 
   2009 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
   2010     PyObject *str,              /* String */
   2011     PyObject *substr,           /* Substring to find */
   2012     Py_ssize_t start,           /* Start index */
   2013     Py_ssize_t end,             /* Stop index */
   2014     int direction               /* Find direction: +1 forward, -1 backward */
   2015     );
   2016 
   2017 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   2018 /* Like PyUnicode_Find, but search for single character only. */
   2019 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
   2020     PyObject *str,
   2021     Py_UCS4 ch,
   2022     Py_ssize_t start,
   2023     Py_ssize_t end,
   2024     int direction
   2025     );
   2026 #endif
   2027 
   2028 /* Count the number of occurrences of substr in str[start:end]. */
   2029 
   2030 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
   2031     PyObject *str,              /* String */
   2032     PyObject *substr,           /* Substring to count */
   2033     Py_ssize_t start,           /* Start index */
   2034     Py_ssize_t end              /* Stop index */
   2035     );
   2036 
   2037 /* Replace at most maxcount occurrences of substr in str with replstr
   2038    and return the resulting Unicode object. */
   2039 
   2040 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
   2041     PyObject *str,              /* String */
   2042     PyObject *substr,           /* Substring to find */
   2043     PyObject *replstr,          /* Substring to replace */
   2044     Py_ssize_t maxcount         /* Max. number of replacements to apply;
   2045                                    -1 = all */
   2046     );
   2047 
   2048 /* Compare two strings and return -1, 0, 1 for less than, equal,
   2049    greater than resp.
   2050    Raise an exception and return -1 on error. */
   2051 
   2052 PyAPI_FUNC(int) PyUnicode_Compare(
   2053     PyObject *left,             /* Left string */
   2054     PyObject *right             /* Right string */
   2055     );
   2056 
   2057 #ifndef Py_LIMITED_API
   2058 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
   2059    0 otherwise.  The right argument must be ASCII identifier.
   2060    Any error occurs inside will be cleared before return. */
   2061 
   2062 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
   2063     PyObject *left,             /* Left string */
   2064     _Py_Identifier *right       /* Right identifier */
   2065     );
   2066 #endif
   2067 
   2068 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
   2069    equal, and greater than, respectively.  It is best to pass only
   2070    ASCII-encoded strings, but the function interprets the input string as
   2071    ISO-8859-1 if it contains non-ASCII characters.
   2072    This function does not raise exceptions. */
   2073 
   2074 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
   2075     PyObject *left,
   2076     const char *right           /* ASCII-encoded string */
   2077     );
   2078 
   2079 #ifndef Py_LIMITED_API
   2080 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
   2081    0 otherwise.  The right argument must be ASCII-encoded string.
   2082    Any error occurs inside will be cleared before return. */
   2083 
   2084 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
   2085     PyObject *left,
   2086     const char *right           /* ASCII-encoded string */
   2087     );
   2088 #endif
   2089 
   2090 /* Rich compare two strings and return one of the following:
   2091 
   2092    - NULL in case an exception was raised
   2093    - Py_True or Py_False for successful comparisons
   2094    - Py_NotImplemented in case the type combination is unknown
   2095 
   2096    Possible values for op:
   2097 
   2098      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
   2099 
   2100 */
   2101 
   2102 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
   2103     PyObject *left,             /* Left string */
   2104     PyObject *right,            /* Right string */
   2105     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
   2106     );
   2107 
   2108 /* Apply an argument tuple or dictionary to a format string and return
   2109    the resulting Unicode string. */
   2110 
   2111 PyAPI_FUNC(PyObject *) PyUnicode_Format(
   2112     PyObject *format,           /* Format string */
   2113     PyObject *args              /* Argument tuple or dictionary */
   2114     );
   2115 
   2116 /* Checks whether element is contained in container and return 1/0
   2117    accordingly.
   2118 
   2119    element has to coerce to a one element Unicode string. -1 is
   2120    returned in case of an error. */
   2121 
   2122 PyAPI_FUNC(int) PyUnicode_Contains(
   2123     PyObject *container,        /* Container string */
   2124     PyObject *element           /* Element string */
   2125     );
   2126 
   2127 /* Checks whether argument is a valid identifier. */
   2128 
   2129 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
   2130 
   2131 #ifndef Py_LIMITED_API
   2132 /* Externally visible for str.strip(unicode) */
   2133 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
   2134     PyObject *self,
   2135     int striptype,
   2136     PyObject *sepobj
   2137     );
   2138 #endif
   2139 
   2140 /* Using explicit passed-in values, insert the thousands grouping
   2141    into the string pointed to by buffer.  For the argument descriptions,
   2142    see Objects/stringlib/localeutil.h */
   2143 #ifndef Py_LIMITED_API
   2144 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
   2145     PyObject *unicode,
   2146     Py_ssize_t index,
   2147     Py_ssize_t n_buffer,
   2148     void *digits,
   2149     Py_ssize_t n_digits,
   2150     Py_ssize_t min_width,
   2151     const char *grouping,
   2152     PyObject *thousands_sep,
   2153     Py_UCS4 *maxchar);
   2154 #endif
   2155 /* === Characters Type APIs =============================================== */
   2156 
   2157 /* Helper array used by Py_UNICODE_ISSPACE(). */
   2158 
   2159 #ifndef Py_LIMITED_API
   2160 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
   2161 
   2162 /* These should not be used directly. Use the Py_UNICODE_IS* and
   2163    Py_UNICODE_TO* macros instead.
   2164 
   2165    These APIs are implemented in Objects/unicodectype.c.
   2166 
   2167 */
   2168 
   2169 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
   2170     Py_UCS4 ch       /* Unicode character */
   2171     );
   2172 
   2173 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
   2174     Py_UCS4 ch       /* Unicode character */
   2175     );
   2176 
   2177 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
   2178     Py_UCS4 ch       /* Unicode character */
   2179     );
   2180 
   2181 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
   2182     Py_UCS4 ch       /* Unicode character */
   2183     );
   2184 
   2185 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
   2186     Py_UCS4 ch       /* Unicode character */
   2187     );
   2188 
   2189 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
   2190     const Py_UCS4 ch         /* Unicode character */
   2191     );
   2192 
   2193 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
   2194     const Py_UCS4 ch         /* Unicode character */
   2195     );
   2196 
   2197 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
   2198     Py_UCS4 ch       /* Unicode character */
   2199     );
   2200 
   2201 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
   2202     Py_UCS4 ch       /* Unicode character */
   2203     );
   2204 
   2205 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
   2206     Py_UCS4 ch       /* Unicode character */
   2207     );
   2208 
   2209 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
   2210     Py_UCS4 ch,       /* Unicode character */
   2211     Py_UCS4 *res
   2212     );
   2213 
   2214 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
   2215     Py_UCS4 ch,       /* Unicode character */
   2216     Py_UCS4 *res
   2217     );
   2218 
   2219 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
   2220     Py_UCS4 ch,       /* Unicode character */
   2221     Py_UCS4 *res
   2222     );
   2223 
   2224 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
   2225     Py_UCS4 ch,       /* Unicode character */
   2226     Py_UCS4 *res
   2227     );
   2228 
   2229 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
   2230     Py_UCS4 ch         /* Unicode character */
   2231     );
   2232 
   2233 PyAPI_FUNC(int) _PyUnicode_IsCased(
   2234     Py_UCS4 ch         /* Unicode character */
   2235     );
   2236 
   2237 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
   2238     Py_UCS4 ch       /* Unicode character */
   2239     );
   2240 
   2241 PyAPI_FUNC(int) _PyUnicode_ToDigit(
   2242     Py_UCS4 ch       /* Unicode character */
   2243     );
   2244 
   2245 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
   2246     Py_UCS4 ch       /* Unicode character */
   2247     );
   2248 
   2249 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
   2250     Py_UCS4 ch       /* Unicode character */
   2251     );
   2252 
   2253 PyAPI_FUNC(int) _PyUnicode_IsDigit(
   2254     Py_UCS4 ch       /* Unicode character */
   2255     );
   2256 
   2257 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
   2258     Py_UCS4 ch       /* Unicode character */
   2259     );
   2260 
   2261 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
   2262     Py_UCS4 ch       /* Unicode character */
   2263     );
   2264 
   2265 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
   2266     Py_UCS4 ch       /* Unicode character */
   2267     );
   2268 
   2269 PyAPI_FUNC(size_t) Py_UNICODE_strlen(
   2270     const Py_UNICODE *u
   2271     );
   2272 
   2273 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
   2274     Py_UNICODE *s1,
   2275     const Py_UNICODE *s2);
   2276 
   2277 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
   2278     Py_UNICODE *s1, const Py_UNICODE *s2);
   2279 
   2280 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
   2281     Py_UNICODE *s1,
   2282     const Py_UNICODE *s2,
   2283     size_t n);
   2284 
   2285 PyAPI_FUNC(int) Py_UNICODE_strcmp(
   2286     const Py_UNICODE *s1,
   2287     const Py_UNICODE *s2
   2288     );
   2289 
   2290 PyAPI_FUNC(int) Py_UNICODE_strncmp(
   2291     const Py_UNICODE *s1,
   2292     const Py_UNICODE *s2,
   2293     size_t n
   2294     );
   2295 
   2296 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
   2297     const Py_UNICODE *s,
   2298     Py_UNICODE c
   2299     );
   2300 
   2301 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
   2302     const Py_UNICODE *s,
   2303     Py_UNICODE c
   2304     );
   2305 
   2306 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
   2307 
   2308 /* Create a copy of a unicode string ending with a nul character. Return NULL
   2309    and raise a MemoryError exception on memory allocation failure, otherwise
   2310    return a new allocated buffer (use PyMem_Free() to free the buffer). */
   2311 
   2312 PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
   2313     PyObject *unicode
   2314     );
   2315 #endif /* Py_LIMITED_API */
   2316 
   2317 #if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
   2318 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
   2319     PyObject *op,
   2320     int check_content);
   2321 #endif
   2322 
   2323 #ifndef Py_LIMITED_API
   2324 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
   2325 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
   2326 /* Clear all static strings. */
   2327 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
   2328 
   2329 /* Fast equality check when the inputs are known to be exact unicode types
   2330    and where the hash values are equal (i.e. a very probable match) */
   2331 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
   2332 #endif /* !Py_LIMITED_API */
   2333 
   2334 #ifdef __cplusplus
   2335 }
   2336 #endif
   2337 #endif /* !Py_UNICODEOBJECT_H */
   2338